diff --git a/netmap/LINUX/Makefile b/netmap/LINUX/Makefile deleted file mode 100644 index 9d3a3cf..0000000 --- a/netmap/LINUX/Makefile +++ /dev/null @@ -1,169 +0,0 @@ -# To build external modules, you must have a prebuilt kernel available -# that contains the configuration and header files used in the build. -# go in the kernel directory and do a -# make oldconfig; make scripts; make prepare -# or make defconfig; make scripts; make prepare -# - - - -# list of objects for this module -# -# objects whose source file is in ../sys/dev/netmap -remoteobjs := netmap.o netmap_mem2.o \ - netmap_generic.o netmap_mbq.o netmap_vale.o \ - netmap_offloadings.o netmap_pipe.o -# all objects -netmap_lin-objs := $(remoteobjs) netmap_linux.o - -obj-$(CONFIG_NETMAP) = netmap_lin.o - -ifndef NODRIVERS -# list of modules to be built (actually also forcedeth and r8169) -MOD_LIST:= CONFIG_E1000=m CONFIG_E1000E=m \ - CONFIG_IXGBE=m CONFIG_IGB=m \ - CONFIG_BNX2X=m CONFIG_MLX4=m \ - CONFIG_VIRTIO_NET=m -obj-m += $(O_DRIVERS) -GET_DRIVERS := get-drivers -else -MOD_LIST:= -endif - -# DRIVER_SRCS names of the driver sources is only used to -# clean files that we copied. -DRIVER_SRCS = r8169.c forcedeth.c e1000/ e1000e/ ixgbe/ igb/ -DRIVER_SRCS += bnx2x/ mellanox/ mlx4/ virtio_net.c - -# _DRV_SUBDIRS contains the subdirs with driver sources. -# In old linuxes everything is under drivers/net, newer versions -# have them in source/drivers/net/ethernet/$(manufacturer) - -_DRV_SUBDIRS= nvidia realtek intel broadcom . .. - -# The following commands are needed to build the modules as out-of-tree, -# in fact the kernel sources path must be specified. - -PWD ?= $(CURDIR) -M:=$(PWD) - -# Additional compile flags (e.g. header location) -EXTRA_CFLAGS := -I$(M) -I$(M)/../sys -I$(M)/../sys/dev -DCONFIG_NETMAP -EXTRA_CFLAGS += -Wno-unused-but-set-variable - -# We use KSRC for the kernel configuration and sources. -# If the sources are elsewhere, then use SRC to point to them. -KSRC ?= /lib/modules/$(shell uname -r)/build -SRC ?= $(KSRC) - -# extract version number. -# version.h can be in two different places. -# NOTE- A.B.C translates to aXXYY where XXYY are hex -LIN_VER = $(shell V=linux/version.h; G=. ; \ - [ -f $(KSRC)/include/$${V} ] || G=generated/uapi ;\ - grep LINUX_VERSION_CODE $(KSRC)/include/$${G}/linux/version.h | \ - awk '{printf "%03x%02x", $$3/256, $$3%256} ') - -# produce a list of applicable patches for this version -PATCHES := $(shell \ - cd $(PWD)/patches; ls diff--* | awk -v v=$(LIN_VER) -F -- \ - '{ if ((!$$3 || $$3 <= v) && (!$$4 || v < $$4)) print $$0; }') - -# source drivers to copy. Names derived from the patches -S_DRIVERS := $(shell \ - cd $(PWD)/patches; ls diff--* | awk -v v=$(LIN_VER) -F -- \ - '{ if ((!$$3 || $$3 <= v) && (!$$4 || v < $$4)) print $$2 }' ) - -# actual drivers after copy and patch -DRIVERS = $(shell [ "$(PATCHES)" != "" ] && ls -dAp \ - `echo $(PATCHES:diff--%=%) | sed -r 's/--[0-9a-f-]+//g'` 2> /dev/null) - -# Compile v1000 (vhost porting to e1000) only if -# the LIN_VER >= 3.8.0, because we don't want to deal -# with backporting problems for v1000. -ifeq ($(word 1, $(sort 30800 $(LIN_VER))), 30800) -CONFIG_V1000:=m -else -CONFIG_V1000:=n -endif - -CONFIG_V1000:=n # force disable by now - -obj-$(CONFIG_V1000) += vhost-port/ - - -all: build - -build: $(GET_DRIVERS) - $(MAKE) -C $(KSRC) M=$(PWD) CONFIG_NETMAP=m $(MOD_LIST) \ - EXTRA_CFLAGS='$(EXTRA_CFLAGS)' \ - O_DRIVERS="$(DRIVERS:%.c=%.o)" modules - @ls -l `find . -name \*.ko` - - -test: - @echo "version $(LIN_VER)" - @echo "patches $(PATCHES)" - @echo "drivers $(DRIVERS)" - -clean: - -@ $(MAKE) -C $(KSRC) M=$(PWD) clean 2> /dev/null - -@ (rm -rf $(DRIVER_SRCS) *.orig *.rej *.ko *.o .*.d \ - .tmp_versions *.mod.c modules.order \ - Module.symvers .*.cmd get-drivers ) - -# the source is not here so we need to specify a dependency -define remote_template -$$(obj)/$(1): $$(M)/../sys/dev/netmap/$(1:.o=.c) - $$(call cmd,cc_o_c) - $$(call cmd,modversions) -endef -$(foreach o,$(remoteobjs),$(eval $(call remote_template,$(o)))) - -#-- copy and patch initial files -# The location changes depending on the OS version, so ... -get-drivers: - -@( \ - if [ -d "$(DRIVER_SRC)" ] ; then \ - cd "$(DRIVER_SRC)"; s=.; what="`ls -dp *`" ; \ - else \ - cd $(SRC); [ -d source ] && cd source ; \ - cd drivers/net; s=. ; \ - [ -d ethernet ] && cd ethernet && s="$(_DRV_SUBDIRS)" ; \ - what="$(S_DRIVERS)" ; \ - fi ; \ - echo "LIN_VER $(LIN_VER)" ; \ - [ "$${what}" = "" ] && echo "-- NO DRIVERS --" && return; \ - echo "---- Building from `pwd`"; \ - echo "---- copying $${what} ---" ; \ - what="$${what} cnic_if.h"; \ - for i in $$s; do (cd $$i ; \ - echo " From `pwd` :"; \ - ls -ldp $${what} 2> /dev/null | sed 's/^/ /' ; \ - cp -Rp $${what} $(PWD) 2>/dev/null ); \ - done ; \ - cd $(PWD) ; \ - for i in $(PATCHES) ; \ - do echo "** patch with $$i"; \ - patch --posix --quiet --force -p1 < patches/$$i; \ - done ; \ - echo "Building the following drivers: $(S_DRIVERS)" ) - @touch get-drivers - - -test3: - @echo "from $(PATCHES) -- to $(MYDRIVERS)" - @echo "Drivers is $(DRIVERS)" - @echo "Actually have `ls -d $(DRIVERS) 2> /dev/null`" - -# compute the diffs for the original files -diffs: - @for i in `find . -name \*.orig`; do \ - diff -urp $$i $${i%.orig} ; \ - done - -apps: - (cd ../examples; $(MAKE)) - -+%: - @echo $($*) diff --git a/netmap/LINUX/README b/netmap/LINUX/README deleted file mode 100644 index 313b59f..0000000 --- a/netmap/LINUX/README +++ /dev/null @@ -1,154 +0,0 @@ -# $Id: README 10863 2012-04-11 17:10:39Z luigi $ - -NETMAP FOR LINUX ----------------- - -This directory contains a version of the "netmap" and "VALE" code for Linux. - -Netmap is a BSD-licensed framework that supports line-rate direct packet -I/O even on 10GBit/s interfaces (14.88Mpps) with limited system load, -and includes a libpcap emulation library to port applications. - -See - - http://info.iet.unipi.it/~luigi/netmap/ - -for more details. There you can also find the latest versions -of the code and documentation as well as pre-built TinyCore -images based on linux 3.0.3 and containing the netmap modules -and some test applications. - -This version supports r8169, ixgbe, igb, e1000, e1000e and forcedeth. - -Netmap relies on a kernel module (netmap_lin.ko) and slightly modified -device drivers. Userspace programs can use the native API (documented -in netmap.4) or a libpcap emulation library. - -The FreeBSD and Linux versions share the same codebase, which -is located in ../sys . For Linux we use some additional glue code, -(bsd_glue.h). - -Device drivers are taken directly from the Linux distributions, -and patched using the files in the patches/ directory. -Common driver modifications are in the .h files in this directory. - - -HOW TO BUILD THE CODE ---------------------- - -1. make sure you have kernel sources/headers matching your installed system - -2. do the following - make clean; make KSRC=/usr/src/linux-kernel-source-or-headers - this produces ./netmap_lin.ko and other kernel modules. - -3. to build sample applications, run - (cd ../examples; make ) - (you will need the pthreads and libpcap-dev packages to build them) - -If you want support for additional drivers please have a look at -ixgbe_netmap_linux.h and the patches in patches/ -The patch file are named as diff--DRIVER--LOW--HIGH--otherstuff -where DRIVER is the driver name to patch, LOW and HIGH are the -versions to which the patch applies (LOW included, HIGH excluded, so -diff--r8169.c--20638--30300--ok applies from 2.6.38 to 3.3.0 (excluded) - -HOW TO USE THE CODE -------------------- - - REMEMBER - THIS IS EXPERIMENTAL CODE WHICH MAY CRASH YOUR SYSTEM. - USE IT AT YOUR OWN RISk. - -Whether you built your own modules, or are using the prebuilt -TinyCore image, the following steps can be used for initial testing: - -1. unload any modules for the network cards you want to use, e.g. - sudo rmmod ixgbe - sudo rmmod e1000 - ... - -2. load netmap and device driver module - sudo insmod ./netmap_lin.ko - sudo insmod ./ixgbe/ixgbe.ko - sudo insmod ./e1000/e1000.ko - ... - -3. turn the interface(s) up - - sudo ifconfig eth0 up # and same for others - -4. Run test applications -- as an example, pkt-gen is a raw packet - sender/receiver which can do line rate on a 10G interface - - # send about 500 million packets of 60 bytes each. - # wait 5s before starting, so the link can go up - sudo pkt-gen -i eth0 -f tx -n 500111222 -l 60 -w 5 - # you should see about 14.88 Mpps - - sudo pkt-gen -i eth0 -f rx # act as a receiver - - -COMMON PROBLEMS ----------------- - -* switching in/out of netmap mode causes the link to go down and up. - If your card is connected to a switch with spanning tree enabled, - the switch will likely MUTE THE LINK FOR 10 SECONDS while it is - detecting the new topology. Either disable the spanning tree on - the switch or use long pauses before sending data; - -* Not all cards can do line rate no matter how fast is your software or - CPU. Several have hardware limitations that prevent reaching the peak - speed, especially for small packet sizes. Examples: - - - ixgbe cannot receive at line rate with packet sizes that are - not multiple of 64 (after CRC stripping). - This is especially evident with minimum-sized frames (-l 60 ) - - - some of the low-end 'e1000' cards can send 1.2 - 1.3Mpps instead - of the theoretical maximum (1.488Mpps) - - - the 'realtek' cards seem unable to send more than 450-500Kpps - even though they can receive at least 1.1Mpps - -* if the link is not up when the packet generator starts, you will - see frequent messages about a link reset. While we work on a fix, - use the '-w' argument on the generator to specify a longer timeout - -* the ixgbe driver (and perhaps others) is severely slowed down if the - remote party is senting flow control frames to slow down traffic. - If that happens try to use the ethtool command to disable flow control. - - -REVISION HISTORY ------------------ - -20120813 - updated distribution using common code for FreeBSD and Linux, - and inclusion of drivers from the linux source tree - -20120322 - fixed the 'igb' driver, now it can send and receive correctly - (the problem was in netmap_rx_irq() so it might have affected - other multiqueue cards). - Also tested the 'r8169' in transmit mode. - Added comments on switches and spanning tree. - -20120217 - initial version. Only ixgbe, e1000 and e1000e are working. - Other drivers (igb, r8169, forcedeth) are supplied only as a - proof of concept. - -DETAILS --------- -+ igb: on linux 3.2 and above the igb driver moved to split buffers, - and netmap was not updated until end of june 2013. - Symptoms were inability to receive short packets. - -+ there are reports of ixgbe and igb unable to read packets. - We are unable to reproduce the problem. - - Ubuntu 12.04 LTS 3.5.0-25-generic. igb read problems ? - - 3.2.0-32-generic with 82598 not working - -+ if_e1000_e uses regular descriptor up 3.1 at least - 3.2.32 is reported to use extended descriptors - (in my repo updated at -r 11975) - diff --git a/netmap/LINUX/archlinux/PKGBUILD b/netmap/LINUX/archlinux/PKGBUILD deleted file mode 100644 index 8d951ea..0000000 --- a/netmap/LINUX/archlinux/PKGBUILD +++ /dev/null @@ -1,74 +0,0 @@ -# See http://wiki.archlinux.org/index.php/VCS_PKGBUILD_Guidelines -# for more information on packaging from GIT sources. - -# Maintainer: Vincenzo Maffione -pkgname=netmap -pkgver=2.0 -pkgrel=1 -pkgdesc="Netmap is a framework for high speed network packet I/O." -arch=('any') -url="http://info.iet.unipi.it/~luigi/netmap" -license=('BSD') -groups=() -depends=('linux' 'glibc') -makedepends=('git' 'sed' 'gzip' 'linux-headers') -provides=() -conflicts=() -replaces=() -backup=() -options=() -install="netmap.install" -source=() -noextract=() -md5sums=() #generate with 'makepkg -g' - -_gitroot="https://v.maffione@code.google.com/p/netmap/" -_gitname="netmap" - -build() { - cd "$srcdir" - msg "Connecting to GIT server...." - - if [[ -d "$_gitname" ]]; then - cd "$_gitname" && git pull origin - msg "The local files are updated." - else - git clone "$_gitroot" "$_gitname" - fi - - msg "GIT checkout done or server timeout" - msg "Starting build..." - - rm -rf "$srcdir/$_gitname-build" - git clone "$srcdir/$_gitname" "$srcdir/$_gitname-build" - cd "$srcdir/$_gitname-build" - - # Build the netmap kernel module - cd "$srcdir/$_gitname-build/LINUX" - make || return 1 - # Build pkt-gen and vale-ctl - cd "$srcdir/$_gitname-build/examples" - make pkt-gen vale-ctl || return 1 -} - -package() { - # Compute the version numbers of the running kernel - KVER1=$(uname -r) - KVER2=$(uname -r | sed 's/\.[0-9]\+-[0-9]\+//') - - # Install the netmap module into the extramodules-VERSION directory - mkdir -p "$pkgdir/lib/modules/extramodules-${KVER2}" - cp "$srcdir/$_gitname-build/LINUX/netmap_lin.ko" "$pkgdir/lib/modules/extramodules-${KVER2}" - - # Install pkt-gen and valectl into /usr/bin - mkdir -p "$pkgdir/usr/bin" - cp "$srcdir/$_gitname-build/examples/pkt-gen" "$pkgdir/usr/bin" - cp "$srcdir/$_gitname-build/examples/vale-ctl" "$pkgdir/usr/bin" - - # Install the netmap man page - mkdir -p "$pkgdir/usr/share/man/man4" - cp "$srcdir/$_gitname-build/share/man/man4/netmap.4" "$pkgdir/usr/share/man/man4" - gzip "$pkgdir/usr/share/man/man4/netmap.4" -} - -# vim:set ts=2 sw=2 et: diff --git a/netmap/LINUX/archlinux/netmap.install b/netmap/LINUX/archlinux/netmap.install deleted file mode 100644 index d8951c2..0000000 --- a/netmap/LINUX/archlinux/netmap.install +++ /dev/null @@ -1,20 +0,0 @@ -post_common() { - depmod -a -} - -## arg 1: the new package version -post_install() { - post_common -} - -## arg 1: the new package version -## arg 2: the old package version -post_upgrade() { - post_common -} - -## arg 1: the old package version -post_remove() { - post_common -} - diff --git a/netmap/LINUX/bsd_glue.h b/netmap/LINUX/bsd_glue.h deleted file mode 100644 index 32d6aea..0000000 --- a/netmap/LINUX/bsd_glue.h +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo - Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * glue code to build the netmap bsd code under linux. - * Some of these tweaks are generic, some are specific for - * character device drivers and network code/device drivers. - */ - -#ifndef _BSD_GLUE_H -#define _BSD_GLUE_H - -/* a set of headers used in netmap */ -#include -#include // ACCESS_ONCE() - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include // eth_type_trans -#include -#include -#include // virt_to_phys -#include -#include // msleep -#include // skb_copy_to_linear_data_offset - -#include // virt_to_phys -#include - -#define printf(fmt, arg...) printk(KERN_ERR fmt, ##arg) -#define KASSERT(a, b) BUG_ON(!(a)) - -/*----- support for compiling on older versions of linux -----*/ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21) -#define HRTIMER_MODE_REL HRTIMER_REL -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) -#define skb_copy_from_linear_data_offset(skb, offset, to, copy) \ - memcpy(to, (skb)->data + offset, copy) - -#define skb_copy_to_linear_data_offset(skb, offset, from, copy) \ - memcpy((skb)->data + offset, from, copy) - -#define skb_copy_to_linear_data(skb, from, copy) \ - memcpy((skb)->data, from, copy) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) -#define ACCESS_ONCE(x) (x) -#define uintptr_t unsigned long -#define skb_get_queue_mapping(m) (0) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25) -/* Forward a hrtimer so it expires after the hrtimer's current now */ -static inline u64 hrtimer_forward_now(struct hrtimer *timer, - ktime_t interval) -{ - return hrtimer_forward(timer, timer->base->get_time(), interval); -} -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) -typedef unsigned long phys_addr_t; -extern struct net init_net; -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) // XXX -#define netdev_ops hard_start_xmit -struct net_device_ops { - int (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev); -}; -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) // XXX 31 -#define netdev_tx_t int -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) -#define usleep_range(a, b) msleep((a)+(b)+999) -#endif /* up to 2.6.35 */ - -/*----------- end of LINUX_VERSION_CODE dependencies ----------*/ - -/* Type redefinitions. XXX check them */ -typedef void * bus_dma_tag_t; -typedef void * bus_dmamap_t; -typedef int bus_size_t; -typedef int bus_dma_segment_t; -typedef void * bus_addr_t; -#define vm_paddr_t phys_addr_t -/* XXX the 'off_t' on Linux corresponds to a 'long' */ -#define vm_offset_t uint32_t -#define vm_ooffset_t unsigned long -struct thread; - -/* endianness macros/functions */ -#define le16toh le16_to_cpu -#define le32toh le32_to_cpu -#define le64toh le64_to_cpu -#define be16toh be16_to_cpu -#define be32toh be32_to_cpu -#define be64toh be64_to_cpu -#define htole32 cpu_to_le32 -#define htole64 cpu_to_le64 -#define htobe16 cpu_to_be16 -#define htobe32 cpu_to_be32 - -#include -#define time_second (jiffies_to_msecs(jiffies) / 1000U ) - -#define bzero(a, len) memset(a, 0, len) - -/* Atomic variables. */ -#define NM_ATOMIC_TEST_AND_SET(p) test_and_set_bit(0, (p)) -#define NM_ATOMIC_CLEAR(p) clear_bit(0, (p)) - -#define NM_ATOMIC_SET(p, v) atomic_set(p, v) -#define NM_ATOMIC_INC(p) atomic_inc(p) -#define NM_ATOMIC_READ_AND_CLEAR(p) atomic_xchg(p, 0) -#define NM_ATOMIC_READ(p) atomic_read(p) - - -// XXX maybe implement it as a proper function somewhere -// it is important to set s->len before the copy. -#define m_devget(_buf, _len, _ofs, _dev, _fn) ( { \ - struct sk_buff *s = netdev_alloc_skb(_dev, _len); \ - if (s) { \ - skb_put(s, _len); \ - skb_copy_to_linear_data_offset(s, _ofs, _buf, _len); \ - s->protocol = eth_type_trans(s, _dev); \ - } \ - s; } ) - -#define mbuf sk_buff -#define m_nextpkt next // chain of mbufs -#define m_freem(m) dev_kfree_skb_any(m) // free a sk_buff - -#define GET_MBUF_REFCNT(m) NM_ATOMIC_READ(&((m)->users)) -#define netmap_get_mbuf(size) alloc_skb(size, GFP_ATOMIC) -/* - * on tx we force skb->queue_mapping = ring_nr, - * but on rx it is the driver that sets the value, - * and it is 0 for no setting, ring_nr+1 otherwise. - */ -#define MBUF_TXQ(m) skb_get_queue_mapping(m) -#define MBUF_RXQ(m) (skb_rx_queue_recorded(m) ? skb_get_rx_queue(m) : 0) -#define SET_MBUF_DESTRUCTOR(m, f) m->destructor = (void *)&f - -/* Magic number for sk_buff.priority field, used to take decisions in - * generic_ndo_start_xmit() and in linux_generic_rx_handler(). - */ -#define NM_MAGIC_PRIORITY_TX 0xad86d310U -#define NM_MAGIC_PRIORITY_RX 0xad86d311U - -/* - * m_copydata() copies from mbuf to buffer following the mbuf chain. - * skb_copy_bits() copies the skb headlen and all the fragments. - */ - -#define m_copydata(m, o, l, b) skb_copy_bits(m, o, b, l) - -#define copyin(_from, _to, _len) copy_from_user(_to, _from, _len) - -/* - * struct ifnet is remapped into struct net_device on linux. - * ifnet has an if_softc field pointing to the device-specific struct - * (adapter). - * On linux the ifnet/net_device is at the beginning of the device-specific - * structure, so a pointer to the first field of the ifnet works. - * We don't use this in netmap, though. - * - * if_xname name device name - * if_capenable priv_flags - * we would use "features" but it is all taken. - * XXX check for conflict in flags use. - * - * In netmap we use if_pspare[0] to point to the netmap_adapter, - * in linux we have no spares so we overload ax25_ptr, and the detection - * for netmap-capable is some magic in the area pointed by that. - */ -#define WNA(_ifp) (_ifp)->ax25_ptr - -#define ifnet net_device /* remap */ -#define if_xname name /* field ifnet-> net_device */ -#define if_capenable priv_flags /* IFCAP_NETMAP */ - -/* some other FreeBSD APIs */ -struct net_device* ifunit_ref(const char *name); -void if_rele(struct net_device *ifp); - -/* hook to send from user space */ -netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *); - -/* prevent ring params change while in netmap mode */ -int linux_netmap_set_ringparam(struct net_device *, struct ethtool_ringparam *); -#ifdef ETHTOOL_SCHANNELS -int linux_netmap_set_channels(struct net_device *, struct ethtool_channels *); -#endif - -#define CURVNET_SET(x) -#define CURVNET_RESTORE(x) - -#define refcount_acquire(_a) atomic_add(1, (atomic_t *)_a) -#define refcount_release(_a) atomic_dec_and_test((atomic_t *)_a) - - -/* - * We use spin_lock_irqsave() because we use the lock in the - * (hard) interrupt context. - */ -typedef struct { - spinlock_t sl; - ulong flags; -} safe_spinlock_t; - -static inline void mtx_lock(safe_spinlock_t *m) -{ - spin_lock_irqsave(&(m->sl), m->flags); -} - -static inline void mtx_unlock(safe_spinlock_t *m) -{ - ulong flags = ACCESS_ONCE(m->flags); - spin_unlock_irqrestore(&(m->sl), flags); -} - -#define mtx_init(a, b, c, d) spin_lock_init(&((a)->sl)) -#define mtx_destroy(a) // XXX spin_lock_destroy(a) - -/* - * XXX these must be changed, as we cannot sleep within the RCU. - * Must change to proper rwlock, and then can move the definitions - * into the main netmap.c file. - */ -#define BDG_RWLOCK_T struct rw_semaphore -#define BDG_RWINIT(b) init_rwsem(&(b)->bdg_lock) -#define BDG_WLOCK(b) down_write(&(b)->bdg_lock) -#define BDG_WUNLOCK(b) up_write(&(b)->bdg_lock) -#define BDG_RLOCK(b) down_read(&(b)->bdg_lock) -#define BDG_RUNLOCK(b) up_read(&(b)->bdg_lock) -#define BDG_RTRYLOCK(b) down_read_trylock(&(b)->bdg_lock) -#define BDG_SET_VAR(lval, p) ((lval) = (p)) -#define BDG_GET_VAR(lval) (lval) -#define BDG_FREE(p) kfree(p) - -/* use volatile to fix a probable compiler error on 2.6.25 */ -#define malloc(_size, type, flags) \ - ({ volatile int _v = _size; kmalloc(_v, GFP_ATOMIC | __GFP_ZERO); }) - -#define free(a, t) kfree(a) - -// XXX do we need GPF_ZERO ? -// XXX do we need GFP_DMA for slots ? -// http://www.mjmwired.net/kernel/Documentation/DMA-API.txt - -#ifndef ilog2 /* not in 2.6.18 */ -static inline int ilog2(uint64_t n) -{ - uint64_t k = 1ULL<<63; - int i; - for (i = 63; i >= 0 && !(n &k); i--, k >>=1) - ; - return i; -} -#endif /* ilog2 */ - -#define contigmalloc(sz, ty, flags, a, b, pgsz, c) \ - (char *) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, \ - ilog2(roundup_pow_of_two((sz)/PAGE_SIZE))) -#define contigfree(va, sz, ty) free_pages((unsigned long)va, \ - ilog2(roundup_pow_of_two(sz)/PAGE_SIZE)) - -#define vtophys virt_to_phys - -/*--- selrecord and friends ---*/ -/* wake_up() or wake_up_interruptible() ? */ -#define OS_selwakeup(sw, pri) wake_up(sw) -#define selrecord(x, y) poll_wait((struct file *)x, y, pwait) - -// #define knlist_destroy(x) // XXX todo - -#define tsleep(a, b, c, t) msleep(10) -// #define wakeup(sw) // XXX double check - -#define microtime do_gettimeofday // debugging - - -/* - * The following trick is to map a struct cdev into a struct miscdevice - * On FreeBSD cdev and cdevsw are two different objects. - */ -#define cdev miscdevice -#define cdevsw miscdevice - - -/* - * XXX to complete - the dmamap interface - */ -#define BUS_DMA_NOWAIT 0 -#define bus_dmamap_load(_1, _2, _3, _4, _5, _6, _7) -#define bus_dmamap_unload(_1, _2) - -typedef int (d_mmap_t)(struct file *f, struct vm_area_struct *vma); -typedef unsigned int (d_poll_t)(struct file * file, struct poll_table_struct *pwait); - -/* - * make_dev will set an error and return the first argument. - * This relies on the availability of the 'error' local variable. - * For old linux systems that do not have devfs, generate a - * message in syslog so the sysadmin knows which command to run - * in order to create the /dev/netmap entry - */ -#define make_dev(_cdev, _zero, _uid, _gid, _perm, _name) \ - ({error = misc_register(_cdev); \ - D("run mknod /dev/%s c %d %d # error %d", \ - (_cdev)->name, MISC_MAJOR, (_cdev)->minor, error); \ - _cdev; } ) -#define destroy_dev(_cdev) misc_deregister(_cdev) - -/*--- sysctl API ----*/ -/* - * linux: sysctl are mapped into /sys/module/ipfw_mod parameters - * windows: they are emulated via get/setsockopt - */ -#define CTLFLAG_RD 1 -#define CTLFLAG_RW 2 - -struct sysctl_oid; -struct sysctl_req; - - -#define SYSCTL_DECL(_1) -#define SYSCTL_OID(_1, _2, _3, _4, _5, _6, _7, _8) -#define SYSCTL_NODE(_1, _2, _3, _4, _5, _6) -#define _SYSCTL_BASE(_name, _var, _ty, _perm) \ - module_param_named(_name, *(_var), _ty, \ - ( (_perm) == CTLFLAG_RD) ? 0444: 0644 ) - -/* XXX should implement this */ -extern struct kernel_param_ops generic_sysctl_ops; - -#define SYSCTL_PROC(_base, _oid, _name, _mode, _var, _val, _fn, _ty, _desc) \ - module_param_cb(_name, &generic_sysctl_ops, _fn, \ - ( (_mode) & CTLFLAG_WR) ? 0644: 0444 ) - - -/* for a string, _var is a preallocated buffer of size _varlen */ -#define SYSCTL_STRING(_base, _oid, _name, _mode, _var, _varlen, _desc) \ - module_param_string(_name, _var, _varlen, \ - ((_mode) == CTLFLAG_RD) ? 0444: 0644 ) - -#define SYSCTL_INT(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, int, _mode) - -#define SYSCTL_LONG(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, long, _mode) - -#define SYSCTL_ULONG(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, ulong, _mode) - -#define SYSCTL_UINT(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, uint, _mode) - -// #define TUNABLE_INT(_name, _ptr) - -#define SYSCTL_VNET_PROC SYSCTL_PROC -#define SYSCTL_VNET_INT SYSCTL_INT - -#define SYSCTL_HANDLER_ARGS \ - struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req -int sysctl_handle_int(SYSCTL_HANDLER_ARGS); -int sysctl_handle_long(SYSCTL_HANDLER_ARGS); - -#define MALLOC_DECLARE(a) -#define MALLOC_DEFINE(a, b, c) - -#define devfs_get_cdevpriv(pp) \ - ({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; \ - (*pp ? 0 : ENOENT); }) - -/* devfs_set_cdevpriv cannot fail on linux */ -#define devfs_set_cdevpriv(p, fn) \ - ({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); }) - - -#define devfs_clear_cdevpriv() do { \ - netmap_dtor(priv); ((struct file *)td)->private_data = 0; \ - } while (0) - -#endif /* _BSD_GLUE_H */ diff --git a/netmap/LINUX/final-patches/diff--e1000--20620--99999 b/netmap/LINUX/final-patches/diff--e1000--20620--99999 deleted file mode 100644 index b719da7..0000000 --- a/netmap/LINUX/final-patches/diff--e1000--20620--99999 +++ /dev/null @@ -1,104 +0,0 @@ -diff --git a/e1000/e1000_main.c b/e1000/e1000_main.c -index bcd192c..5de7009 100644 ---- a/e1000/e1000_main.c -+++ b/e1000/e1000_main.c -@@ -213,6 +213,10 @@ static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE; - module_param(debug, int, 0); - MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - /** - * e1000_init_module - Driver Registration Routine - * -@@ -375,6 +379,10 @@ static void e1000_configure(struct e1000_adapter *adapter) - e1000_configure_tx(adapter); - e1000_setup_rctl(adapter); - e1000_configure_rx(adapter); -+#ifdef DEV_NETMAP -+ if (e1000_netmap_init_buffers(adapter)) -+ return; -+#endif /* DEV_NETMAP */ - /* call E1000_DESC_UNUSED which always leaves - * at least 1 descriptor unused to make sure - * next_to_use != next_to_clean */ -@@ -402,6 +410,10 @@ int e1000_up(struct e1000_adapter *adapter) - - netif_wake_queue(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif /* DEV_NETMAP */ -+ - /* fire a link change interrupt to start the watchdog */ - ew32(ICS, E1000_ICS_LSC); - return 0; -@@ -485,6 +497,10 @@ void e1000_down(struct e1000_adapter *adapter) - ew32(RCTL, rctl & ~E1000_RCTL_EN); - /* flush and sleep below */ - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - netif_tx_disable(netdev); - - /* disable transmits in the hardware */ -@@ -1035,6 +1051,10 @@ static int __devinit e1000_probe(struct pci_dev *pdev, - adapter->wol = adapter->eeprom_wol; - device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); - -+#ifdef DEV_NETMAP -+ e1000_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - /* print bus type/speed/width info */ - DPRINTK(PROBE, INFO, "(PCI%s:%s:%s) ", - ((hw->bus_type == e1000_bus_type_pcix) ? "-X" : ""), -@@ -1113,6 +1133,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev) - - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); -+ -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ - - iounmap(hw->hw_addr); - if (hw->flash_address) -@@ -1291,6 +1315,10 @@ static int e1000_open(struct net_device *netdev) - - netif_start_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif -+ - /* fire a link status change interrupt to start the watchdog */ - ew32(ICS, E1000_ICS_LSC); - -@@ -3429,6 +3457,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, - unsigned int count = 0; - unsigned int total_tx_bytes=0, total_tx_packets=0; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(netdev, 0)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC(*tx_ring, eop); -@@ -3795,6 +3827,11 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, - bool cleaned = false; - unsigned int total_rx_bytes=0, total_rx_packets=0; - -+#ifdef DEV_NETMAP -+ ND("calling netmap_rx_irq"); -+ if (netmap_rx_irq(netdev, 0, work_done)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = E1000_RX_DESC(*rx_ring, i); - buffer_info = &rx_ring->buffer_info[i]; diff --git a/netmap/LINUX/final-patches/diff--e1000e--20620--20623 b/netmap/LINUX/final-patches/diff--e1000e--20620--20623 deleted file mode 100644 index 4c7ebf0..0000000 --- a/netmap/LINUX/final-patches/diff--e1000e--20620--20623 +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/e1000e/netdev.c b/e1000e/netdev.c -index fad8f9e..50f74e2 100644 ---- a/e1000e/netdev.c -+++ b/e1000e/netdev.c -@@ -87,6 +87,10 @@ static int e1000_desc_unused(struct e1000_ring *ring) - return ring->count + ring->next_to_clean - ring->next_to_use - 1; - } - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - /** - * e1000_receive_skb - helper function to handle Rx indications - * @adapter: board private structure -@@ -446,6 +450,10 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, - bool cleaned = 0; - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(netdev, 0, work_done)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = E1000_RX_DESC(*rx_ring, i); - buffer_info = &rx_ring->buffer_info[i]; -@@ -624,6 +632,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter) - unsigned int count = 0; - unsigned int total_tx_bytes = 0, total_tx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(netdev, 0)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC(*tx_ring, eop); -@@ -2632,6 +2644,10 @@ static void e1000_configure(struct e1000_adapter *adapter) - e1000_configure_tx(adapter); - e1000_setup_rctl(adapter); - e1000_configure_rx(adapter); -+#ifdef DEV_NETMAP -+ if (e1000e_netmap_init_buffers(adapter)) -+ return; -+#endif /* DEV_NETMAP */ - adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring)); - } - -@@ -2892,6 +2908,10 @@ void e1000e_down(struct e1000_adapter *adapter) - - netif_stop_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - /* disable transmits in the hardware */ - tctl = er32(TCTL); - tctl &= ~E1000_TCTL_EN; -@@ -3174,6 +3194,10 @@ static int e1000_open(struct net_device *netdev) - - netif_start_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - /* fire a link status change interrupt to start the watchdog */ - ew32(ICS, E1000_ICS_LSC); - -@@ -5227,6 +5251,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev, - if (err) - goto err_register; - -+#ifdef DEV_NETMAP -+ e1000_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -@@ -5300,6 +5327,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev) - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); - -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - iounmap(adapter->hw.hw_addr); - if (adapter->hw.flash_address) - iounmap(adapter->hw.flash_address); diff --git a/netmap/LINUX/final-patches/diff--e1000e--20623--30100 b/netmap/LINUX/final-patches/diff--e1000e--20623--30100 deleted file mode 100644 index e7754de..0000000 --- a/netmap/LINUX/final-patches/diff--e1000e--20623--30100 +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/e1000e/netdev.c b/e1000e/netdev.c -index 57a7e41..d8bc988 100644 ---- a/e1000e/netdev.c -+++ b/e1000e/netdev.c -@@ -435,6 +435,10 @@ static int e1000_desc_unused(struct e1000_ring *ring) - return ring->count + ring->next_to_clean - ring->next_to_use - 1; - } - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - /** - * e1000_receive_skb - helper function to handle Rx indications - * @adapter: board private structure -@@ -763,6 +767,10 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, - bool cleaned = 0; - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(netdev, 0, work_done)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = E1000_RX_DESC(*rx_ring, i); - buffer_info = &rx_ring->buffer_info[i]; -@@ -977,6 +985,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter) - unsigned int count = 0; - unsigned int total_tx_bytes = 0, total_tx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(netdev, 0)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC(*tx_ring, eop); -@@ -3001,6 +3013,10 @@ static void e1000_configure(struct e1000_adapter *adapter) - e1000_configure_tx(adapter); - e1000_setup_rctl(adapter); - e1000_configure_rx(adapter); -+#ifdef DEV_NETMAP -+ if (e1000e_netmap_init_buffers(adapter)) -+ return; -+#endif /* DEV_NETMAP */ - adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring)); - } - -@@ -3240,6 +3256,10 @@ void e1000e_down(struct e1000_adapter *adapter) - - netif_stop_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - /* disable transmits in the hardware */ - tctl = er32(TCTL); - tctl &= ~E1000_TCTL_EN; -@@ -3532,6 +3552,10 @@ static int e1000_open(struct net_device *netdev) - - netif_start_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - adapter->idle_check = true; - pm_runtime_put(&pdev->dev); - -@@ -5716,6 +5740,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev, - if (err) - goto err_register; - -+#ifdef DEV_NETMAP -+ e1000_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -@@ -5813,6 +5840,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev) - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); - -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - iounmap(adapter->hw.hw_addr); - if (adapter->hw.flash_address) - iounmap(adapter->hw.flash_address); diff --git a/netmap/LINUX/final-patches/diff--e1000e--30100--30400 b/netmap/LINUX/final-patches/diff--e1000e--30100--30400 deleted file mode 100644 index 5032d88..0000000 --- a/netmap/LINUX/final-patches/diff--e1000e--30100--30400 +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/e1000e/netdev.c b/e1000e/netdev.c -index 2198e61..caf2767 100644 ---- a/e1000e/netdev.c -+++ b/e1000e/netdev.c -@@ -452,6 +452,10 @@ static int e1000_desc_unused(struct e1000_ring *ring) - return ring->count + ring->next_to_clean - ring->next_to_use - 1; - } - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - /** - * e1000_receive_skb - helper function to handle Rx indications - * @adapter: board private structure -@@ -849,6 +853,10 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, - bool cleaned = 0; - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(netdev, 0, work_done)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = E1000_RX_DESC(*rx_ring, i); - buffer_info = &rx_ring->buffer_info[i]; -@@ -1066,6 +1074,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter) - unsigned int count = 0; - unsigned int total_tx_bytes = 0, total_tx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(netdev, 0)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC(*tx_ring, eop); -@@ -3177,6 +3189,10 @@ static void e1000_configure(struct e1000_adapter *adapter) - e1000_configure_tx(adapter); - e1000_setup_rctl(adapter); - e1000_configure_rx(adapter); -+#ifdef DEV_NETMAP -+ if (e1000e_netmap_init_buffers(adapter)) -+ return; -+#endif /* DEV_NETMAP */ - adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring), - GFP_KERNEL); - } -@@ -3468,6 +3484,10 @@ void e1000e_down(struct e1000_adapter *adapter) - - netif_stop_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - /* disable transmits in the hardware */ - tctl = er32(TCTL); - tctl &= ~E1000_TCTL_EN; -@@ -3755,6 +3775,10 @@ static int e1000_open(struct net_device *netdev) - - netif_start_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - adapter->idle_check = true; - pm_runtime_put(&pdev->dev); - -@@ -6147,6 +6171,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev, - if (err) - goto err_register; - -+#ifdef DEV_NETMAP -+ e1000_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -@@ -6234,6 +6261,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev) - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); - -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - iounmap(adapter->hw.hw_addr); - if (adapter->hw.flash_address) - iounmap(adapter->hw.flash_address); diff --git a/netmap/LINUX/final-patches/diff--e1000e--30400--30900 b/netmap/LINUX/final-patches/diff--e1000e--30400--30900 deleted file mode 100644 index 0c6de76..0000000 --- a/netmap/LINUX/final-patches/diff--e1000e--30400--30900 +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/e1000e/netdev.c b/e1000e/netdev.c -index 9520a6a..f6f2df6 100644 ---- a/e1000e/netdev.c -+++ b/e1000e/netdev.c -@@ -467,6 +467,10 @@ static int e1000_desc_unused(struct e1000_ring *ring) - return ring->count + ring->next_to_clean - ring->next_to_use - 1; - } - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - /** - * e1000_receive_skb - helper function to handle Rx indications - * @adapter: board private structure -@@ -875,6 +879,10 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, - bool cleaned = false; - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(netdev, 0, work_done)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); -@@ -1129,6 +1137,10 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring) - unsigned int total_tx_bytes = 0, total_tx_packets = 0; - unsigned int bytes_compl = 0, pkts_compl = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(netdev, 0)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC(*tx_ring, eop); -@@ -3358,6 +3370,10 @@ static void e1000_configure(struct e1000_adapter *adapter) - e1000e_setup_rss_hash(adapter); - e1000_setup_rctl(adapter); - e1000_configure_rx(adapter); -+#ifdef DEV_NETMAP -+ if (e1000e_netmap_init_buffers(adapter)) -+ return; -+#endif /* DEV_NETMAP */ - adapter->alloc_rx_buf(rx_ring, e1000_desc_unused(rx_ring), GFP_KERNEL); - } - -@@ -3657,6 +3673,10 @@ void e1000e_down(struct e1000_adapter *adapter) - - netif_stop_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - /* disable transmits in the hardware */ - tctl = er32(TCTL); - tctl &= ~E1000_TCTL_EN; -@@ -3946,6 +3966,10 @@ static int e1000_open(struct net_device *netdev) - adapter->tx_hang_recheck = false; - netif_start_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - adapter->idle_check = true; - pm_runtime_put(&pdev->dev); - -@@ -6417,6 +6441,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev, - if (err) - goto err_register; - -+#ifdef DEV_NETMAP -+ e1000_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -@@ -6504,6 +6531,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev) - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); - -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - iounmap(adapter->hw.hw_addr); - if (adapter->hw.flash_address) - iounmap(adapter->hw.flash_address); diff --git a/netmap/LINUX/final-patches/diff--e1000e--30900--99999 b/netmap/LINUX/final-patches/diff--e1000e--30900--99999 deleted file mode 100644 index 3156ba3..0000000 --- a/netmap/LINUX/final-patches/diff--e1000e--30900--99999 +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/e1000e/netdev.c b/e1000e/netdev.c -index 7e615e2..f9d8a88 100644 ---- a/e1000e/netdev.c -+++ b/e1000e/netdev.c -@@ -473,6 +473,10 @@ static int e1000_desc_unused(struct e1000_ring *ring) - return ring->count + ring->next_to_clean - ring->next_to_use - 1; - } - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - /** - * e1000e_systim_to_hwtstamp - convert system time value to hw time stamp - * @adapter: board private structure -@@ -914,6 +918,10 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, - bool cleaned = false; - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(netdev, 0, work_done)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); -@@ -1203,6 +1211,10 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring) - unsigned int total_tx_bytes = 0, total_tx_packets = 0; - unsigned int bytes_compl = 0, pkts_compl = 0; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(netdev, 0)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC(*tx_ring, eop); -@@ -3685,6 +3697,10 @@ static void e1000_configure(struct e1000_adapter *adapter) - e1000e_setup_rss_hash(adapter); - e1000_setup_rctl(adapter); - e1000_configure_rx(adapter); -+#ifdef DEV_NETMAP -+ if (e1000e_netmap_init_buffers(adapter)) -+ return; -+#endif /* DEV_NETMAP */ - adapter->alloc_rx_buf(rx_ring, e1000_desc_unused(rx_ring), GFP_KERNEL); - } - -@@ -3988,6 +4004,10 @@ void e1000e_down(struct e1000_adapter *adapter) - - netif_stop_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - /* disable transmits in the hardware */ - tctl = er32(TCTL); - tctl &= ~E1000_TCTL_EN; -@@ -4307,6 +4327,10 @@ static int e1000_open(struct net_device *netdev) - adapter->tx_hang_recheck = false; - netif_start_queue(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - adapter->idle_check = true; - hw->mac.get_link_status = true; - pm_runtime_put(&pdev->dev); -@@ -6768,6 +6792,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) - if (err) - goto err_register; - -+#ifdef DEV_NETMAP -+ e1000_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -@@ -6866,6 +6893,10 @@ static void e1000_remove(struct pci_dev *pdev) - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); - -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - iounmap(adapter->hw.hw_addr); - if (adapter->hw.flash_address) - iounmap(adapter->hw.flash_address); diff --git a/netmap/LINUX/final-patches/diff--forcedeth.c--20626--99999 b/netmap/LINUX/final-patches/diff--forcedeth.c--20626--99999 deleted file mode 100644 index e9723a2..0000000 --- a/netmap/LINUX/final-patches/diff--forcedeth.c--20626--99999 +++ /dev/null @@ -1,76 +0,0 @@ -diff --git a/forcedeth.c b/forcedeth.c -index 9c0b1ba..b081d6b 100644 ---- a/forcedeth.c -+++ b/forcedeth.c -@@ -1865,12 +1865,25 @@ static void nv_init_tx(struct net_device *dev) - } - } - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* we need a few forward declarations */ -+static void nv_drain_rxtx(struct net_device *dev); -+static int nv_init_ring(struct net_device *dev); -+#include -+#endif -+ - static int nv_init_ring(struct net_device *dev) - { - struct fe_priv *np = netdev_priv(dev); - - nv_init_tx(dev); - nv_init_rx(dev); -+#ifdef DEV_NETMAP -+ forcedeth_netmap_tx_init(np); -+ if (forcedeth_netmap_rx_init(np)) -+ return 0; /* success */ -+#endif /* DEV_NETMAP */ -+ - - if (!nv_optimized(np)) - return nv_alloc_rx(dev); -@@ -3386,6 +3399,11 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data) - int i; - unsigned long flags; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(dev, 0)) -+ return IRQ_HANDLED; -+#endif /* DEV_NETMAP */ -+ - for (i = 0;; i++) { - events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_TX_ALL; - writel(NVREG_IRQ_TX_ALL, base + NvRegMSIXIrqStatus); -@@ -3497,6 +3515,11 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data) - int i; - unsigned long flags; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(dev, 0, &i)) -+ return IRQ_HANDLED; -+#endif /* DEV_NETMAP */ -+ - for (i = 0;; i++) { - events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_RX_ALL; - writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus); -@@ -5645,6 +5668,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i - goto out_error; - } - -+#ifdef DEV_NETMAP -+ forcedeth_netmap_attach(np); -+#endif /* DEV_NETMAP */ -+ - netif_carrier_off(dev); - - dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", -@@ -5728,6 +5755,10 @@ static void __devexit nv_remove(struct pci_dev *pci_dev) - - unregister_netdev(dev); - -+#ifdef DEV_NETMAP -+ netmap_detach(dev); -+#endif /* DEV_NETMAP */ -+ - nv_restore_mac_addr(pci_dev); - - /* restore any phy related changes */ diff --git a/netmap/LINUX/final-patches/diff--igb--20621--20623 b/netmap/LINUX/final-patches/diff--igb--20621--20623 deleted file mode 100644 index 9450a34..0000000 --- a/netmap/LINUX/final-patches/diff--igb--20621--20623 +++ /dev/null @@ -1,37 +0,0 @@ -diff --git a/igb/igb_main.c b/igb/igb_main.c -index c881347..77b3fda 100644 ---- a/igb/igb_main.c -+++ b/igb/igb_main.c -@@ -1144,6 +1144,10 @@ int igb_up(struct igb_adapter *adapter) - - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -1167,6 +1171,10 @@ void igb_down(struct igb_adapter *adapter) - wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); - /* flush and sleep below */ - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - netif_tx_stop_all_queues(netdev); - - /* disable transmits in the hardware */ -@@ -2018,6 +2026,10 @@ static int igb_open(struct net_device *netdev) - - netif_tx_start_all_queues(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); diff --git a/netmap/LINUX/final-patches/diff--igb--20623--30200 b/netmap/LINUX/final-patches/diff--igb--20623--30200 deleted file mode 100644 index d9236b8..0000000 --- a/netmap/LINUX/final-patches/diff--igb--20623--30200 +++ /dev/null @@ -1,115 +0,0 @@ -diff --git a/igb/igb_main.c b/igb/igb_main.c -index cea37e0..70777e4 100644 ---- a/igb/igb_main.c -+++ b/igb/igb_main.c -@@ -201,6 +201,10 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); - MODULE_LICENSE("GPL"); - MODULE_VERSION(DRV_VERSION); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct igb_reg_info { - u32 ofs; - char *name; -@@ -1478,6 +1482,10 @@ int igb_up(struct igb_adapter *adapter) - - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -1501,6 +1509,10 @@ void igb_down(struct igb_adapter *adapter) - wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); - /* flush and sleep below */ - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - netif_tx_stop_all_queues(netdev); - - /* disable transmits in the hardware */ -@@ -1963,6 +1975,10 @@ static int __devinit igb_probe(struct pci_dev *pdev, - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -+#ifdef DEV_NETMAP -+ igb_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - #ifdef CONFIG_IGB_DCA - if (dca_add_requester(&pdev->dev) == 0) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; -@@ -2072,6 +2088,10 @@ static void __devexit igb_remove(struct pci_dev *pdev) - dev_info(&pdev->dev, "IOV Disabled\n"); - } - #endif -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - - iounmap(hw->hw_addr); - if (hw->flash_address) -@@ -2366,6 +2386,10 @@ static int igb_open(struct net_device *netdev) - - netif_tx_start_all_queues(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -2545,6 +2569,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, - - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - wr32(E1000_TXDCTL(reg_idx), txdctl); -+#ifdef DEV_NETMAP -+ igb_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - /** -@@ -5338,6 +5365,11 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) - unsigned int i, eop, count = 0; - bool cleaned = false; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(netdev, tx_ring->queue_index)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ -+ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop); -@@ -5540,6 +5572,11 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector, - u16 length; - u16 vlan_tag; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(netdev, rx_ring->queue_index, work_done)) -+ return 1; -+#endif /* DEV_NETMAP */ -+ - i = rx_ring->next_to_clean; - buffer_info = &rx_ring->buffer_info[i]; - rx_desc = E1000_RX_DESC_ADV(*rx_ring, i); -@@ -5668,6 +5705,10 @@ void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count) - unsigned int i; - int bufsz; - -+#ifdef DEV_NETMAP -+ if (igb_netmap_configure_rx_ring(rx_ring)) -+ return; -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_use; - buffer_info = &rx_ring->buffer_info[i]; - diff --git a/netmap/LINUX/final-patches/diff--igb--30200--30300 b/netmap/LINUX/final-patches/diff--igb--30200--30300 deleted file mode 100644 index b6736ef..0000000 --- a/netmap/LINUX/final-patches/diff--igb--30200--30300 +++ /dev/null @@ -1,136 +0,0 @@ -diff --git a/igb/igb_main.c b/igb/igb_main.c -index ced5444..fb7c766 100644 ---- a/igb/igb_main.c -+++ b/igb/igb_main.c -@@ -225,6 +225,10 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); - MODULE_LICENSE("GPL"); - MODULE_VERSION(DRV_VERSION); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct igb_reg_info { - u32 ofs; - char *name; -@@ -1551,6 +1555,10 @@ int igb_up(struct igb_adapter *adapter) - - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -1584,6 +1592,10 @@ void igb_down(struct igb_adapter *adapter) - wrfl(); - msleep(10); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - for (i = 0; i < adapter->num_q_vectors; i++) - napi_disable(&(adapter->q_vector[i]->napi)); - -@@ -2073,6 +2085,10 @@ static int __devinit igb_probe(struct pci_dev *pdev, - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -+#ifdef DEV_NETMAP -+ igb_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - #ifdef CONFIG_IGB_DCA - if (dca_add_requester(&pdev->dev) == 0) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; -@@ -2199,6 +2215,10 @@ static void __devexit igb_remove(struct pci_dev *pdev) - dev_info(&pdev->dev, "IOV Disabled\n"); - } - #endif -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - - iounmap(hw->hw_addr); - if (hw->flash_address) -@@ -2529,6 +2549,10 @@ static int igb_open(struct net_device *netdev) - - netif_tx_start_all_queues(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -2711,6 +2735,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, - - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - wr32(E1000_TXDCTL(reg_idx), txdctl); -+#ifdef DEV_NETMAP -+ igb_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - /** -@@ -3088,6 +3115,19 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, - /* Only set Drop Enable if we are supporting multiple queues */ - if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) - srrctl |= E1000_SRRCTL_DROP_EN; -+#ifdef DEV_NETMAP -+ { -+ /* The driver uses split buffers, which are not -+ * supported in netmap mode */ -+ struct ifnet *ifp = adapter->netdev; -+ struct netmap_adapter *na = NA(ifp); -+ if (na && ifp->if_capenable & IFCAP_NETMAP) { -+ srrctl &= ~(7 << 25); /* clear descriptor type */ -+ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; -+ /* XXX we should set tail here */ -+ } -+ } -+#endif - - wr32(E1000_SRRCTL(reg_idx), srrctl); - -@@ -5705,6 +5745,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) - - if (test_bit(__IGB_DOWN, &adapter->state)) - return true; -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IGB_TX_DESC(tx_ring, i); -@@ -5980,6 +6024,12 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) - u16 cleaned_count = igb_desc_unused(rx_ring); - u16 i = rx_ring->next_to_clean; - -+#ifdef DEV_NETMAP -+ int dummy = 1; // select rx irq handling -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy)) -+ return 1; -+#endif /* DEV_NETMAP */ -+ - rx_desc = IGB_RX_DESC(rx_ring, i); - - while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { -@@ -6170,6 +6220,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) - struct igb_rx_buffer *bi; - u16 i = rx_ring->next_to_use; - -+#ifdef DEV_NETMAP -+ if (igb_netmap_configure_rx_ring(rx_ring)) -+ return; -+#endif /* DEV_NETMAP */ -+ - rx_desc = IGB_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; diff --git a/netmap/LINUX/final-patches/diff--igb--30300--30800 b/netmap/LINUX/final-patches/diff--igb--30300--30800 deleted file mode 100644 index a5b0f10..0000000 --- a/netmap/LINUX/final-patches/diff--igb--30300--30800 +++ /dev/null @@ -1,136 +0,0 @@ -diff --git a/igb/igb_main.c b/igb/igb_main.c -index 94be6c3..294051b 100644 ---- a/igb/igb_main.c -+++ b/igb/igb_main.c -@@ -236,6 +236,10 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); - MODULE_LICENSE("GPL"); - MODULE_VERSION(DRV_VERSION); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct igb_reg_info { - u32 ofs; - char *name; -@@ -1557,6 +1561,10 @@ int igb_up(struct igb_adapter *adapter) - - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -1590,6 +1598,10 @@ void igb_down(struct igb_adapter *adapter) - wrfl(); - msleep(10); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - for (i = 0; i < adapter->num_q_vectors; i++) - napi_disable(&(adapter->q_vector[i]->napi)); - -@@ -2081,6 +2093,10 @@ static int __devinit igb_probe(struct pci_dev *pdev, - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -+#ifdef DEV_NETMAP -+ igb_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - #ifdef CONFIG_IGB_DCA - if (dca_add_requester(&pdev->dev) == 0) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; -@@ -2211,6 +2227,10 @@ static void __devexit igb_remove(struct pci_dev *pdev) - dev_info(&pdev->dev, "IOV Disabled\n"); - } - #endif -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - - iounmap(hw->hw_addr); - if (hw->flash_address) -@@ -2547,6 +2567,10 @@ static int __igb_open(struct net_device *netdev, bool resuming) - - netif_tx_start_all_queues(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - if (!resuming) - pm_runtime_put(&pdev->dev); - -@@ -2750,6 +2774,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, - - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - wr32(E1000_TXDCTL(reg_idx), txdctl); -+#ifdef DEV_NETMAP -+ igb_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - /** -@@ -3127,6 +3154,19 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, - /* Only set Drop Enable if we are supporting multiple queues */ - if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) - srrctl |= E1000_SRRCTL_DROP_EN; -+#ifdef DEV_NETMAP -+ { -+ /* The driver uses split buffers, which are not -+ * supported in netmap mode */ -+ struct ifnet *ifp = adapter->netdev; -+ struct netmap_adapter *na = NA(ifp); -+ if (na && ifp->if_capenable & IFCAP_NETMAP) { -+ srrctl &= ~(7 << 25); /* clear descriptor type */ -+ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; -+ /* XXX we should set tail here */ -+ } -+ } -+#endif - - wr32(E1000_SRRCTL(reg_idx), srrctl); - -@@ -5753,6 +5793,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) - - if (test_bit(__IGB_DOWN, &adapter->state)) - return true; -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IGB_TX_DESC(tx_ring, i); -@@ -6030,6 +6074,12 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) - u16 cleaned_count = igb_desc_unused(rx_ring); - u16 i = rx_ring->next_to_clean; - -+#ifdef DEV_NETMAP -+ int dummy = 1; // select rx irq handling -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy)) -+ return 1; -+#endif /* DEV_NETMAP */ -+ - rx_desc = IGB_RX_DESC(rx_ring, i); - - while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { -@@ -6220,6 +6270,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) - struct igb_rx_buffer *bi; - u16 i = rx_ring->next_to_use; - -+#ifdef DEV_NETMAP -+ if (igb_netmap_configure_rx_ring(rx_ring)) -+ return; -+#endif /* DEV_NETMAP */ -+ - rx_desc = IGB_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; diff --git a/netmap/LINUX/final-patches/diff--igb--30800--30b00 b/netmap/LINUX/final-patches/diff--igb--30800--30b00 deleted file mode 100644 index a457d64..0000000 --- a/netmap/LINUX/final-patches/diff--igb--30800--30b00 +++ /dev/null @@ -1,114 +0,0 @@ -diff --git a/igb/igb_main.c b/igb/igb_main.c -index 31cfe2e..8439bc6 100644 ---- a/igb/igb_main.c -+++ b/igb/igb_main.c -@@ -247,6 +247,10 @@ static int debug = -1; - module_param(debug, int, 0); - MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct igb_reg_info { - u32 ofs; - char *name; -@@ -1520,6 +1524,10 @@ int igb_up(struct igb_adapter *adapter) - - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -1553,6 +1561,10 @@ void igb_down(struct igb_adapter *adapter) - wrfl(); - msleep(10); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - for (i = 0; i < adapter->num_q_vectors; i++) - napi_disable(&(adapter->q_vector[i]->napi)); - -@@ -2127,6 +2139,10 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -+#ifdef DEV_NETMAP -+ igb_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - #ifdef CONFIG_IGB_DCA - if (dca_add_requester(&pdev->dev) == 0) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; -@@ -2233,6 +2249,10 @@ static void igb_remove(struct pci_dev *pdev) - wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); - } - #endif -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - - /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. */ -@@ -2553,6 +2573,10 @@ static int __igb_open(struct net_device *netdev, bool resuming) - - netif_tx_start_all_queues(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - if (!resuming) - pm_runtime_put(&pdev->dev); - -@@ -2746,6 +2770,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, - - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - wr32(E1000_TXDCTL(reg_idx), txdctl); -+#ifdef DEV_NETMAP -+ igb_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - /** -@@ -5690,6 +5717,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) - - if (test_bit(__IGB_DOWN, &adapter->state)) - return true; -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IGB_TX_DESC(tx_ring, i); -@@ -6349,6 +6380,10 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) - unsigned int total_bytes = 0, total_packets = 0; - u16 cleaned_count = igb_desc_unused(rx_ring); - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &total_packets)) -+ return true; -+#endif /* DEV_NETMAP */ - do { - union e1000_adv_rx_desc *rx_desc; - -@@ -6461,6 +6496,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) - struct igb_rx_buffer *bi; - u16 i = rx_ring->next_to_use; - -+#ifdef DEV_NETMAP -+ if (igb_netmap_configure_rx_ring(rx_ring)) -+ return; -+#endif /* DEV_NETMAP */ -+ - /* nothing to do */ - if (!cleaned_count) - return; diff --git a/netmap/LINUX/final-patches/diff--igb--30b00--99999 b/netmap/LINUX/final-patches/diff--igb--30b00--99999 deleted file mode 100644 index 6b9e4a2..0000000 --- a/netmap/LINUX/final-patches/diff--igb--30b00--99999 +++ /dev/null @@ -1,113 +0,0 @@ -diff --git a/igb/igb_main.c b/igb/igb_main.c -index c1d72c0..9815796 100644 ---- a/igb/igb_main.c -+++ b/igb/igb_main.c -@@ -255,6 +255,10 @@ static int debug = -1; - module_param(debug, int, 0); - MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct igb_reg_info { - u32 ofs; - char *name; -@@ -1633,6 +1637,10 @@ int igb_up(struct igb_adapter *adapter) - - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif /* DEV_NETMAP */ -+ - /* start the watchdog. */ - hw->mac.get_link_status = 1; - schedule_work(&adapter->watchdog_task); -@@ -1674,6 +1682,9 @@ void igb_down(struct igb_adapter *adapter) - napi_disable(&(adapter->q_vector[i]->napi)); - } - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif /* DEV_NETMAP */ - - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); -@@ -2295,6 +2306,10 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) - /* carrier off reporting is important to ethtool even BEFORE open */ - netif_carrier_off(netdev); - -+#ifdef DEV_NETMAP -+ igb_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - #ifdef CONFIG_IGB_DCA - if (dca_add_requester(&pdev->dev) == 0) { - adapter->flags |= IGB_FLAG_DCA_ENABLED; -@@ -2536,6 +2551,10 @@ static void igb_remove(struct pci_dev *pdev) - wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); - } - #endif -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - - /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. -@@ -2814,6 +2833,10 @@ static int __igb_open(struct net_device *netdev, bool resuming) - - netif_tx_start_all_queues(netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(netdev); -+#endif /* DEV_NETMAP */ -+ - if (!resuming) - pm_runtime_put(&pdev->dev); - -@@ -3007,6 +3030,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, - - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - wr32(E1000_TXDCTL(reg_idx), txdctl); -+#ifdef DEV_NETMAP -+ igb_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - /** -@@ -5991,6 +6017,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) - - if (test_bit(__IGB_DOWN, &adapter->state)) - return true; -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index)) -+ return 1; /* cleaned ok */ -+#endif /* DEV_NETMAP */ - - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IGB_TX_DESC(tx_ring, i); -@@ -6650,6 +6680,10 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) - unsigned int total_bytes = 0, total_packets = 0; - u16 cleaned_count = igb_desc_unused(rx_ring); - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &total_packets)) -+ return true; -+#endif /* DEV_NETMAP */ - do { - union e1000_adv_rx_desc *rx_desc; - -@@ -6767,6 +6801,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) - struct igb_rx_buffer *bi; - u16 i = rx_ring->next_to_use; - -+#ifdef DEV_NETMAP -+ if (igb_netmap_configure_rx_ring(rx_ring)) -+ return; -+#endif /* DEV_NETMAP */ -+ - /* nothing to do */ - if (!cleaned_count) - return; diff --git a/netmap/LINUX/final-patches/diff--ixgbe--20625--20626 b/netmap/LINUX/final-patches/diff--ixgbe--20625--20626 deleted file mode 100644 index f736ab4..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--20625--20626 +++ /dev/null @@ -1,113 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index eee0b29..70581eb 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -214,6 +214,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -740,6 +756,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - unsigned int i, eop, count = 0; - unsigned int total_bytes = 0, total_packets = 0; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->tx_buffer_info[i].next_to_watch; - eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop); -@@ -1185,6 +1211,13 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - int ddp_bytes = 0; - #endif /* IXGBE_FCOE */ - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done)) -+ return; -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); -@@ -2519,6 +2552,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -2833,6 +2869,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(adapter, ring, IXGBE_DESC_UNUSED(ring)); - } - -@@ -3614,6 +3654,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -3863,6 +3907,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - /* Cleanup the affinity_hint CPU mask memory and callback */ - for (i = 0; i < num_q_vectors; i++) { - struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; -@@ -7048,6 +7096,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, - - e_dev_info("Intel(R) 10 Gigabit Network Connection\n"); - cards_found++; -+ -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--ixgbe--20626--30000 b/netmap/LINUX/final-patches/diff--ixgbe--20626--30000 deleted file mode 100644 index cf8c681..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--20626--30000 +++ /dev/null @@ -1,113 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index 30f9ccf..60c0252 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -221,6 +221,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -826,6 +842,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - unsigned int total_bytes = 0, total_packets = 0; - u16 i, eop, count = 0; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->tx_buffer_info[i].next_to_watch; - eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop); -@@ -1308,6 +1334,13 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - u16 cleaned_count = 0; - bool pkt_is_rsc = false; - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done)) -+ return; -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); -@@ -2730,6 +2763,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -3094,6 +3130,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, IXGBE_DESC_UNUSED(ring)); - } - -@@ -3882,6 +3922,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4121,6 +4165,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - /* Cleanup the affinity_hint CPU mask memory and callback */ - for (i = 0; i < num_q_vectors; i++) { - struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; -@@ -7450,6 +7498,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, - - e_dev_info("Intel(R) 10 Gigabit Network Connection\n"); - cards_found++; -+ -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--ixgbe--30000--30100 b/netmap/LINUX/final-patches/diff--ixgbe--30000--30100 deleted file mode 100644 index be2ba86..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--30000--30100 +++ /dev/null @@ -1,113 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index 08e8e25..8070930 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -247,6 +247,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -864,6 +880,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - unsigned int total_bytes = 0, total_packets = 0; - u16 i, eop, count = 0; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ - i = tx_ring->next_to_clean; - eop = tx_ring->tx_buffer_info[i].next_to_watch; - eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop); -@@ -1348,6 +1374,13 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - u16 cleaned_count = 0; - bool pkt_is_rsc = false; - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done)) -+ return; -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); -@@ -2808,6 +2841,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -3183,6 +3219,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, IXGBE_DESC_UNUSED(ring)); - } - -@@ -3976,6 +4016,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4212,6 +4256,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -7683,6 +7731,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, - - e_dev_info("Intel(R) 10 Gigabit Network Connection\n"); - cards_found++; -+ -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--ixgbe--30100--30200 b/netmap/LINUX/final-patches/diff--ixgbe--30100--30200 deleted file mode 100644 index 47a830e..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--30100--30200 +++ /dev/null @@ -1,114 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index e1fcc95..1aab0df 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -249,6 +249,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -801,6 +817,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - unsigned int total_bytes = 0, total_packets = 0; - u16 i, eop, count = 0; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ -+ - i = tx_ring->next_to_clean; - eop = tx_ring->tx_buffer_info[i].next_to_watch; - eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop); -@@ -1303,6 +1330,13 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - u16 cleaned_count = 0; - bool pkt_is_rsc = false; - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done)) -+ return; -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); -@@ -2676,6 +2710,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -3039,6 +3076,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring)); - } - -@@ -3873,6 +3914,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4126,6 +4171,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -7696,6 +7745,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, - - e_dev_info("Intel(R) 10 Gigabit Network Connection\n"); - cards_found++; -+ -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--ixgbe--30200--30400 b/netmap/LINUX/final-patches/diff--ixgbe--30200--30400 deleted file mode 100644 index 57cee3b..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--30200--30400 +++ /dev/null @@ -1,115 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index 8ef92d1..6a37803 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -188,6 +188,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -745,6 +761,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - unsigned int budget = q_vector->tx.work_limit; - u16 i = tx_ring->next_to_clean; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return true; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ -+ - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i); - -@@ -1253,6 +1280,14 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - u16 cleaned_count = 0; - bool pkt_is_rsc = false; - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ int dummy; -+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, &dummy)) -+ return true; -+#endif /* DEV_NETMAP */ - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); -@@ -2420,6 +2455,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -2783,6 +2821,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring)); - } - -@@ -3757,6 +3799,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4007,6 +4053,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -7710,6 +7760,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, - - e_dev_info("Intel(R) 10 Gigabit Network Connection\n"); - cards_found++; -+ -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--ixgbe--30400--30500 b/netmap/LINUX/final-patches/diff--ixgbe--30400--30500 deleted file mode 100644 index a8375af..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--30400--30500 +++ /dev/null @@ -1,124 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index 467948e..0aa1511 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -204,6 +204,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -749,6 +765,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - if (test_bit(__IXGBE_DOWN, &adapter->state)) - return true; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ -+ - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC(tx_ring, i); - i -= tx_ring->count; -@@ -1629,6 +1656,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - #endif /* IXGBE_FCOE */ - u16 cleaned_count = ixgbe_desc_unused(rx_ring); - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ int dummy; -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy)) -+ return true; /* no more interrupts */ -+#endif /* DEV_NETMAP */ -+ - do { - struct ixgbe_rx_buffer *rx_buffer; - union ixgbe_adv_rx_desc *rx_desc; -@@ -2683,6 +2719,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -3032,6 +3071,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring)); - } - -@@ -3986,6 +4029,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4249,6 +4296,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4764,6 +4815,7 @@ static int ixgbe_open(struct net_device *netdev) - - ixgbe_up_complete(adapter); - -+ - return 0; - - err_req_irq: -@@ -7152,6 +7204,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, - - e_dev_info("%s\n", ixgbe_default_device_descr); - cards_found++; -+ -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--ixgbe--30500--30900 b/netmap/LINUX/final-patches/diff--ixgbe--30500--30900 deleted file mode 100644 index 4b3c77a..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--30500--30900 +++ /dev/null @@ -1,123 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index e242104..02e1544 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -204,6 +204,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -764,6 +780,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - if (test_bit(__IXGBE_DOWN, &adapter->state)) - return true; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ -+ - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC(tx_ring, i); - i -= tx_ring->count; -@@ -1665,6 +1692,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - #endif /* IXGBE_FCOE */ - u16 cleaned_count = ixgbe_desc_unused(rx_ring); - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ int dummy; -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy)) -+ return true; /* no more interrupts */ -+#endif /* DEV_NETMAP */ -+ - do { - struct ixgbe_rx_buffer *rx_buffer; - union ixgbe_adv_rx_desc *rx_desc; -@@ -2725,6 +2761,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -3102,6 +3141,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring)); - } - -@@ -4051,6 +4094,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4315,6 +4362,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4827,6 +4878,7 @@ static int ixgbe_open(struct net_device *netdev) - - ixgbe_up_complete(adapter); - -+ - return 0; - - err_req_irq: -@@ -7358,6 +7410,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, - e_err(probe, "failed to allocate sysfs resources\n"); - #endif /* CONFIG_IXGBE_HWMON */ - -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--ixgbe--30900--30a00 b/netmap/LINUX/final-patches/diff--ixgbe--30900--30a00 deleted file mode 100644 index f98a21a..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--30900--30a00 +++ /dev/null @@ -1,134 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index 79f4a26..4b8a25b 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -202,6 +202,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -826,6 +842,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - if (test_bit(__IXGBE_DOWN, &adapter->state)) - return true; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ -+ - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC(tx_ring, i); - i -= tx_ring->count; -@@ -1860,6 +1887,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - #endif /* IXGBE_FCOE */ - u16 cleaned_count = ixgbe_desc_unused(rx_ring); - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ int dummy; -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy)) -+ return true; /* no more interrupts */ -+#endif /* DEV_NETMAP */ -+ - do { - union ixgbe_adv_rx_desc *rx_desc; - struct sk_buff *skb; -@@ -2846,6 +2882,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -3207,6 +3246,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring)); - } - -@@ -4155,6 +4198,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4402,6 +4449,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4976,6 +5027,7 @@ static int ixgbe_open(struct net_device *netdev) - - ixgbe_up_complete(adapter); - -+ - return 0; - - err_set_queues: -@@ -7619,6 +7671,10 @@ skip_sriov: - ixgbe_dbg_adapter_init(adapter); - #endif /* CONFIG_DEBUG_FS */ - -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: -@@ -7653,6 +7709,10 @@ static void ixgbe_remove(struct pci_dev *pdev) - struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; - -+#ifdef DEV_NETMAP -+ netmap_detach(netdev); -+#endif /* DEV_NETMAP */ -+ - #ifdef CONFIG_DEBUG_FS - ixgbe_dbg_adapter_exit(adapter); - #endif /*CONFIG_DEBUG_FS */ diff --git a/netmap/LINUX/final-patches/diff--ixgbe--30a00--99999 b/netmap/LINUX/final-patches/diff--ixgbe--30a00--99999 deleted file mode 100644 index 57451d7..0000000 --- a/netmap/LINUX/final-patches/diff--ixgbe--30a00--99999 +++ /dev/null @@ -1,123 +0,0 @@ -diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c -index d30fbdd..7418c57 100644 ---- a/ixgbe/ixgbe_main.c -+++ b/ixgbe/ixgbe_main.c -@@ -248,6 +248,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = { - {} - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to -+ * be a reference on how to implement netmap support in a driver. -+ * Additional comments are in ixgbe_netmap_linux.h . -+ * -+ * The code is originally developed on FreeBSD and in the interest -+ * of maintainability we try to limit differences between the two systems. -+ * -+ * contains functions for netmap support -+ * that extend the standard driver. -+ * It also defines DEV_NETMAP so further conditional sections use -+ * that instead of CONFIG_NETMAP -+ */ -+#include -+#endif - - /* - * ixgbe_regdump - register printout routine -@@ -872,6 +888,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - if (test_bit(__IXGBE_DOWN, &adapter->state)) - return true; - -+#ifdef DEV_NETMAP -+ /* -+ * In netmap mode, all the work is done in the context -+ * of the client thread. Interrupt handlers only wake up -+ * clients, which may be sleeping on individual rings -+ * or on a global resource for all rings. -+ */ -+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index)) -+ return 1; /* seems to be ignored */ -+#endif /* DEV_NETMAP */ -+ - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC(tx_ring, i); - i -= tx_ring->count; -@@ -1906,6 +1933,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - #endif /* IXGBE_FCOE */ - u16 cleaned_count = ixgbe_desc_unused(rx_ring); - -+#ifdef DEV_NETMAP -+ /* -+ * Same as the txeof routine: only wakeup clients on intr. -+ */ -+ int dummy; -+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy)) -+ return true; /* no more interrupts */ -+#endif /* DEV_NETMAP */ -+ - do { - union ixgbe_adv_rx_desc *rx_desc; - struct sk_buff *skb; -@@ -2905,6 +2941,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, - } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); - if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); -+#ifdef DEV_NETMAP -+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx); -+#endif /* DEV_NETMAP */ - } - - static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) -@@ -3266,6 +3305,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); - - ixgbe_rx_desc_queue_enable(adapter, ring); -+#ifdef DEV_NETMAP -+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx)) -+ return; -+#endif /* DEV_NETMAP */ - ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring)); - } - -@@ -4216,6 +4259,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(adapter->netdev); -+#endif -+ - /* bring the link up in the watchdog, this could race with our first - * link up interrupt but shouldn't be a problem */ - adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -4463,6 +4510,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter) - - ixgbe_napi_disable_all(adapter); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(netdev); -+#endif -+ - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); - adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; -@@ -5037,6 +5088,7 @@ static int ixgbe_open(struct net_device *netdev) - - ixgbe_up_complete(adapter); - -+ - return 0; - - err_set_queues: -@@ -7658,6 +7710,10 @@ skip_sriov: - IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL, - true); - -+#ifdef DEV_NETMAP -+ ixgbe_netmap_attach(adapter); -+#endif /* DEV_NETMAP */ -+ - return 0; - - err_register: diff --git a/netmap/LINUX/final-patches/diff--r8169.c--20620--20625 b/netmap/LINUX/final-patches/diff--r8169.c--20620--20625 deleted file mode 100644 index c159128..0000000 --- a/netmap/LINUX/final-patches/diff--r8169.c--20620--20625 +++ /dev/null @@ -1,117 +0,0 @@ -diff --git a/r8169.c b/r8169.c -index 0fe2fc9..efee0a4 100644 ---- a/r8169.c -+++ b/r8169.c -@@ -537,6 +537,10 @@ static int rtl8169_poll(struct napi_struct *napi, int budget); - static const unsigned int rtl8169_rx_config = - (RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - static void mdio_write(void __iomem *ioaddr, int reg_addr, int value) - { - int i; -@@ -3210,6 +3214,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - - device_set_wakeup_enable(&pdev->dev, tp->features & RTL_FEATURE_WOL); - -+#ifdef DEV_NETMAP -+ re_netmap_attach(tp); -+#endif /* DEV_NETMAP */ -+ - out: - return rc; - -@@ -3236,6 +3244,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev) - - unregister_netdev(dev); - -+#ifdef DEV_NETMAP -+ netmap_detach(dev); -+#endif /* DEV_NETMAP */ -+ - /* restore original MAC address */ - rtl_rar_set(tp, dev->perm_addr); - -@@ -3291,6 +3303,10 @@ static int rtl8169_open(struct net_device *dev) - - napi_enable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl_hw_start(dev); - - rtl8169_request_timer(dev); -@@ -3993,6 +4009,11 @@ err_out: - static void rtl8169_rx_clear(struct rtl8169_private *tp) - { - unsigned int i; -+#ifdef DEV_NETMAP -+ re_netmap_tx_init(tp); -+ if (re_netmap_rx_init(tp)) -+ return 0; // success -+#endif /* DEV_NETMAP */ - - for (i = 0; i < NUM_RX_DESC; i++) { - if (tp->Rx_skbuff[i]) { -@@ -4112,11 +4133,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev) - /* Wait for any pending NAPI task to complete */ - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl8169_irq_mask_and_ack(ioaddr); - - tp->intr_mask = 0xffff; - RTL_W16(IntrMask, tp->intr_event); - napi_enable(&tp->napi); -+ -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ - } - - static void rtl8169_reinit_task(struct work_struct *work) -@@ -4372,6 +4401,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev, - { - unsigned int dirty_tx, tx_left; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(dev, 0)) -+ return; -+#endif /* DEV_NETMAP */ -+ - dirty_tx = tp->dirty_tx; - smp_rmb(); - tx_left = tp->cur_tx - dirty_tx; -@@ -4468,6 +4502,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev, - unsigned int cur_rx, rx_left; - unsigned int delta, count; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(dev, 0, &count)) -+ return count; -+#endif /* DEV_NETMAP */ -+ - cur_rx = tp->cur_rx; - rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx; - rx_left = min(rx_left, budget); -@@ -4687,7 +4726,12 @@ static void rtl8169_down(struct net_device *dev) - - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - core_down: -+ - spin_lock_irq(&tp->lock); - - rtl8169_asic_down(ioaddr); diff --git a/netmap/LINUX/final-patches/diff--r8169.c--20625--20626 b/netmap/LINUX/final-patches/diff--r8169.c--20625--20626 deleted file mode 100644 index abdc0c8..0000000 --- a/netmap/LINUX/final-patches/diff--r8169.c--20625--20626 +++ /dev/null @@ -1,115 +0,0 @@ -diff --git a/r8169.c b/r8169.c -index 53b13de..745a59d 100644 ---- a/r8169.c -+++ b/r8169.c -@@ -535,6 +535,10 @@ static int rtl8169_poll(struct napi_struct *napi, int budget); - static const unsigned int rtl8169_rx_config = - (RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - static void mdio_write(void __iomem *ioaddr, int reg_addr, int value) - { - int i; -@@ -3229,6 +3233,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - if (pci_dev_run_wake(pdev)) - pm_runtime_put_noidle(&pdev->dev); - -+#ifdef DEV_NETMAP -+ re_netmap_attach(tp); -+#endif /* DEV_NETMAP */ -+ - out: - return rc; - -@@ -3257,6 +3265,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev) - if (pci_dev_run_wake(pdev)) - pm_runtime_get_noresume(&pdev->dev); - -+#ifdef DEV_NETMAP -+ netmap_detach(dev); -+#endif /* DEV_NETMAP */ -+ - /* restore original MAC address */ - rtl_rar_set(tp, dev->perm_addr); - -@@ -3303,6 +3315,10 @@ static int rtl8169_open(struct net_device *dev) - - napi_enable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl_hw_start(dev); - - rtl8169_request_timer(dev); -@@ -4018,6 +4034,11 @@ static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc) - static int rtl8169_rx_fill(struct rtl8169_private *tp) - { - unsigned int i; -+#ifdef DEV_NETMAP -+ re_netmap_tx_init(tp); -+ if (re_netmap_rx_init(tp)) -+ return 0; // success -+#endif /* DEV_NETMAP */ - - for (i = 0; i < NUM_RX_DESC; i++) { - void *data; -@@ -4119,11 +4140,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev) - /* Wait for any pending NAPI task to complete */ - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl8169_irq_mask_and_ack(ioaddr); - - tp->intr_mask = 0xffff; - RTL_W16(IntrMask, tp->intr_event); - napi_enable(&tp->napi); -+ -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ - } - - static void rtl8169_reinit_task(struct work_struct *work) -@@ -4395,6 +4424,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev, - { - unsigned int dirty_tx, tx_left; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(dev, 0)) -+ return; -+#endif /* DEV_NETMAP */ -+ - dirty_tx = tp->dirty_tx; - smp_rmb(); - tx_left = tp->cur_tx - dirty_tx; -@@ -4490,6 +4524,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev, - unsigned int count; - int polling = (budget != ~(u32)0) ? 1 : 0; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(dev, 0, &count)) -+ return count; -+#endif /* DEV_NETMAP */ -+ - cur_rx = tp->cur_rx; - rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx; - rx_left = min(rx_left, budget); -@@ -4691,6 +4730,10 @@ static void rtl8169_down(struct net_device *dev) - - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - spin_lock_irq(&tp->lock); - - rtl8169_asic_down(ioaddr); diff --git a/netmap/LINUX/final-patches/diff--r8169.c--20626--30200 b/netmap/LINUX/final-patches/diff--r8169.c--20626--30200 deleted file mode 100644 index 2eb1e36..0000000 --- a/netmap/LINUX/final-patches/diff--r8169.c--20626--30200 +++ /dev/null @@ -1,114 +0,0 @@ -diff --git a/r8169.c b/r8169.c -index 7ffdb80..6bae7e6 100644 ---- a/r8169.c -+++ b/r8169.c -@@ -590,6 +590,10 @@ static int rtl8169_poll(struct napi_struct *napi, int budget); - static const unsigned int rtl8169_rx_config = - (RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift); - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg) - { - void __iomem *ioaddr = tp->mmio_addr; -@@ -3207,6 +3211,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - if (pci_dev_run_wake(pdev)) - pm_runtime_put_noidle(&pdev->dev); - -+#ifdef DEV_NETMAP -+ re_netmap_attach(tp); -+#endif /* DEV_NETMAP */ -+ - netif_carrier_off(dev); - - out: -@@ -3238,6 +3246,9 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev) - cancel_delayed_work_sync(&tp->task); - - rtl_release_firmware(tp); -+#ifdef DEV_NETMAP -+ netmap_detach(dev); -+#endif /* DEV_NETMAP */ - - unregister_netdev(dev); - -@@ -3291,6 +3302,10 @@ static int rtl8169_open(struct net_device *dev) - - napi_enable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl8169_init_phy(dev, tp); - - /* -@@ -4074,6 +4089,11 @@ static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc) - static int rtl8169_rx_fill(struct rtl8169_private *tp) - { - unsigned int i; -+#ifdef DEV_NETMAP -+ re_netmap_tx_init(tp); -+ if (re_netmap_rx_init(tp)) -+ return 0; // success -+#endif /* DEV_NETMAP */ - - for (i = 0; i < NUM_RX_DESC; i++) { - void *data; -@@ -4175,11 +4195,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev) - /* Wait for any pending NAPI task to complete */ - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl8169_irq_mask_and_ack(ioaddr); - - tp->intr_mask = 0xffff; - RTL_W16(IntrMask, tp->intr_event); - napi_enable(&tp->napi); -+ -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ - } - - static void rtl8169_reinit_task(struct work_struct *work) -@@ -4452,6 +4480,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev, - { - unsigned int dirty_tx, tx_left; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(dev, 0)) -+ return; -+#endif /* DEV_NETMAP */ -+ - dirty_tx = tp->dirty_tx; - smp_rmb(); - tx_left = tp->cur_tx - dirty_tx; -@@ -4547,6 +4580,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev, - unsigned int count; - int polling = (budget != ~(u32)0) ? 1 : 0; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(dev, 0, &count)) -+ return count; -+#endif /* DEV_NETMAP */ -+ - cur_rx = tp->cur_rx; - rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx; - rx_left = min(rx_left, budget); -@@ -4769,6 +4807,10 @@ static void rtl8169_down(struct net_device *dev) - - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - spin_lock_irq(&tp->lock); - - rtl8169_asic_down(ioaddr); diff --git a/netmap/LINUX/final-patches/diff--r8169.c--30200--30400 b/netmap/LINUX/final-patches/diff--r8169.c--30200--30400 deleted file mode 100644 index 39d301e..0000000 --- a/netmap/LINUX/final-patches/diff--r8169.c--30200--30400 +++ /dev/null @@ -1,114 +0,0 @@ -diff --git a/r8169.c b/r8169.c -index c8f47f1..a41e878 100644 ---- a/r8169.c -+++ b/r8169.c -@@ -787,6 +787,10 @@ static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force) - } - } - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg) - { - void __iomem *ioaddr = tp->mmio_addr; -@@ -4167,6 +4171,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - if (pci_dev_run_wake(pdev)) - pm_runtime_put_noidle(&pdev->dev); - -+#ifdef DEV_NETMAP -+ re_netmap_attach(tp); -+#endif /* DEV_NETMAP */ -+ - netif_carrier_off(dev); - - out: -@@ -4201,6 +4209,9 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev) - unregister_netdev(dev); - - rtl_release_firmware(tp); -+#ifdef DEV_NETMAP -+ netmap_detach(dev); -+#endif /* DEV_NETMAP */ - - if (pci_dev_run_wake(pdev)) - pm_runtime_get_noresume(&pdev->dev); -@@ -4298,6 +4309,10 @@ static int rtl8169_open(struct net_device *dev) - - napi_enable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl8169_init_phy(dev, tp); - - rtl8169_set_features(dev, dev->features); -@@ -5252,6 +5267,11 @@ static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc) - static int rtl8169_rx_fill(struct rtl8169_private *tp) - { - unsigned int i; -+#ifdef DEV_NETMAP -+ re_netmap_tx_init(tp); -+ if (re_netmap_rx_init(tp)) -+ return 0; // success -+#endif /* DEV_NETMAP */ - - for (i = 0; i < NUM_RX_DESC; i++) { - void *data; -@@ -5348,11 +5368,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev) - /* Wait for any pending NAPI task to complete */ - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - rtl8169_irq_mask_and_ack(tp); - - tp->intr_mask = 0xffff; - RTL_W16(IntrMask, tp->intr_event); - napi_enable(&tp->napi); -+ -+#ifdef DEV_NETMAP -+ netmap_enable_all_rings(dev); -+#endif /* DEV_NETMAP */ - } - - static void rtl8169_reinit_task(struct work_struct *work) -@@ -5627,6 +5655,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev, - { - unsigned int dirty_tx, tx_left; - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(dev, 0)) -+ return; -+#endif /* DEV_NETMAP */ -+ - dirty_tx = tp->dirty_tx; - smp_rmb(); - tx_left = tp->cur_tx - dirty_tx; -@@ -5714,6 +5747,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev, - unsigned int cur_rx, rx_left; - unsigned int count; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(dev, 0, &count)) -+ return count; -+#endif /* DEV_NETMAP */ -+ - cur_rx = tp->cur_rx; - rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx; - rx_left = min(rx_left, budget); -@@ -5920,6 +5958,10 @@ static void rtl8169_down(struct net_device *dev) - - napi_disable(&tp->napi); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif /* DEV_NETMAP */ -+ - spin_lock_irq(&tp->lock); - - rtl8169_hw_reset(tp); diff --git a/netmap/LINUX/final-patches/diff--virtio_net.c--20622--20625 b/netmap/LINUX/final-patches/diff--virtio_net.c--20622--20625 deleted file mode 100644 index 7877f2a..0000000 --- a/netmap/LINUX/final-patches/diff--virtio_net.c--20622--20625 +++ /dev/null @@ -1,85 +0,0 @@ -diff --git a/virtio_net.c b/virtio_net.c -index b0577dd..6516934 100644 ---- a/virtio_net.c -+++ b/virtio_net.c -@@ -64,6 +64,10 @@ struct virtnet_info - struct page *pages; - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; -@@ -121,6 +125,10 @@ static void skb_xmit_done(struct virtqueue *svq) - /* Suppress further interrupts. */ - svq->vq_ops->disable_cb(svq); - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(vi->dev, 0)) -+ return; -+#endif - /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); - } -@@ -470,7 +478,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) - struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); - void *buf; - unsigned int len, received = 0; -+#ifdef DEV_NETMAP -+ int work_done = 0; - -+ if (netmap_rx_irq(vi->dev, 0, &work_done)) { -+ napi_complete(napi); -+ ND("called netmap_rx_irq"); -+ -+ return 1; -+ } -+#endif - again: - while (received < budget && - (buf = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) { -@@ -638,6 +655,10 @@ static int virtnet_open(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ virtio_netmap_init_buffers(vi); -+ netmap_enable_all_rings(dev); -+#endif - napi_enable(&vi->napi); - - /* If all buffers were filled by other side before we napi_enabled, we -@@ -700,6 +721,9 @@ static int virtnet_close(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif - napi_disable(&vi->napi); - - return 0; -@@ -985,6 +1009,10 @@ static int virtnet_probe(struct virtio_device *vdev) - goto unregister; - } - -+#ifdef DEV_NETMAP -+ virtio_netmap_attach(vi); -+#endif -+ - vi->status = VIRTIO_NET_S_LINK_UP; - virtnet_update_status(vi); - netif_carrier_on(dev); -@@ -1028,6 +1056,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev) - { - struct virtnet_info *vi = vdev->priv; - -+#ifdef DEV_NETMAP -+ netmap_detach(vi->dev); -+#endif - /* Stop all the virtqueues. */ - vdev->config->reset(vdev); - diff --git a/netmap/LINUX/final-patches/diff--virtio_net.c--20625--20626 b/netmap/LINUX/final-patches/diff--virtio_net.c--20625--20626 deleted file mode 100644 index 5ad382f..0000000 --- a/netmap/LINUX/final-patches/diff--virtio_net.c--20625--20626 +++ /dev/null @@ -1,85 +0,0 @@ -diff --git a/virtio_net.c b/virtio_net.c -index b6d4028..a9be38d 100644 ---- a/virtio_net.c -+++ b/virtio_net.c -@@ -67,6 +67,10 @@ struct virtnet_info { - struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; -@@ -124,6 +128,10 @@ static void skb_xmit_done(struct virtqueue *svq) - /* Suppress further interrupts. */ - virtqueue_disable_cb(svq); - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(vi->dev, 0)) -+ return; -+#endif - /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); - } -@@ -467,7 +475,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) - struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); - void *buf; - unsigned int len, received = 0; -+#ifdef DEV_NETMAP -+ int work_done = 0; - -+ if (netmap_rx_irq(vi->dev, 0, &work_done)) { -+ napi_complete(napi); -+ ND("called netmap_rx_irq"); -+ -+ return 1; -+ } -+#endif - again: - while (received < budget && - (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) { -@@ -638,6 +655,10 @@ static int virtnet_open(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ virtio_netmap_init_buffers(vi); -+ netmap_enable_all_rings(dev); -+#endif - napi_enable(&vi->napi); - - /* If all buffers were filled by other side before we napi_enabled, we -@@ -700,6 +721,9 @@ static int virtnet_close(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif - napi_disable(&vi->napi); - - return 0; -@@ -986,6 +1010,10 @@ static int virtnet_probe(struct virtio_device *vdev) - goto unregister; - } - -+#ifdef DEV_NETMAP -+ virtio_netmap_attach(vi); -+#endif -+ - /* Assume link up if device can't report link status, - otherwise get link status from config. */ - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { -@@ -1035,6 +1063,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev) - { - struct virtnet_info *vi = vdev->priv; - -+#ifdef DEV_NETMAP -+ netmap_detach(vi->dev); -+#endif - /* Stop all the virtqueues. */ - vdev->config->reset(vdev); - diff --git a/netmap/LINUX/final-patches/diff--virtio_net.c--20626--30300 b/netmap/LINUX/final-patches/diff--virtio_net.c--20626--30300 deleted file mode 100644 index 7585400..0000000 --- a/netmap/LINUX/final-patches/diff--virtio_net.c--20626--30300 +++ /dev/null @@ -1,85 +0,0 @@ -diff --git a/virtio_net.c b/virtio_net.c -index 82dba5a..f217797 100644 ---- a/virtio_net.c -+++ b/virtio_net.c -@@ -67,6 +67,10 @@ struct virtnet_info { - struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; -@@ -124,6 +128,10 @@ static void skb_xmit_done(struct virtqueue *svq) - /* Suppress further interrupts. */ - virtqueue_disable_cb(svq); - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(vi->dev, 0)) -+ return; -+#endif - /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); - } -@@ -481,7 +489,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) - struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); - void *buf; - unsigned int len, received = 0; -+#ifdef DEV_NETMAP -+ int work_done = 0; - -+ if (netmap_rx_irq(vi->dev, 0, &work_done)) { -+ napi_complete(napi); -+ ND("called netmap_rx_irq"); -+ -+ return 1; -+ } -+#endif - again: - while (received < budget && - (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) { -@@ -652,6 +669,10 @@ static int virtnet_open(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ virtio_netmap_init_buffers(vi); -+ netmap_enable_all_rings(dev); -+#endif - virtnet_napi_enable(vi); - return 0; - } -@@ -705,6 +726,9 @@ static int virtnet_close(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif - napi_disable(&vi->napi); - - return 0; -@@ -991,6 +1015,10 @@ static int virtnet_probe(struct virtio_device *vdev) - goto unregister; - } - -+#ifdef DEV_NETMAP -+ virtio_netmap_attach(vi); -+#endif -+ - /* Assume link up if device can't report link status, - otherwise get link status from config. */ - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { -@@ -1040,6 +1068,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev) - { - struct virtnet_info *vi = vdev->priv; - -+#ifdef DEV_NETMAP -+ netmap_detach(vi->dev); -+#endif - /* Stop all the virtqueues. */ - vdev->config->reset(vdev); - diff --git a/netmap/LINUX/final-patches/diff--virtio_net.c--30300--30500 b/netmap/LINUX/final-patches/diff--virtio_net.c--30300--30500 deleted file mode 100644 index bbad386..0000000 --- a/netmap/LINUX/final-patches/diff--virtio_net.c--30300--30500 +++ /dev/null @@ -1,90 +0,0 @@ -diff --git a/virtio_net.c b/virtio_net.c -index 4880aa8..6329c3a 100644 ---- a/virtio_net.c -+++ b/virtio_net.c -@@ -80,6 +80,10 @@ struct virtnet_info { - struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; -@@ -137,6 +141,10 @@ static void skb_xmit_done(struct virtqueue *svq) - /* Suppress further interrupts. */ - virtqueue_disable_cb(svq); - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(vi->dev, 0)) -+ return; -+#endif - /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); - } -@@ -517,7 +525,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) - struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); - void *buf; - unsigned int len, received = 0; -+#ifdef DEV_NETMAP -+ int work_done = 0; - -+ if (netmap_rx_irq(vi->dev, 0, &work_done)) { -+ napi_complete(napi); -+ ND("called netmap_rx_irq"); -+ -+ return 1; -+ } -+#endif - again: - while (received < budget && - (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) { -@@ -727,7 +744,15 @@ static void virtnet_netpoll(struct net_device *dev) - static int virtnet_open(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); -+#ifdef DEV_NETMAP -+ int ok = virtio_netmap_init_buffers(vi); - -+ netmap_enable_all_rings(dev); -+ if (ok) { -+ virtnet_napi_enable(vi); -+ return 0; -+ } -+#endif - /* Make sure we have some buffers: if oom use wq. */ - if (!try_fill_recv(vi, GFP_KERNEL)) - queue_delayed_work(system_nrt_wq, &vi->refill, 0); -@@ -785,6 +810,9 @@ static int virtnet_close(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif - /* Make sure refill_work doesn't re-enable napi! */ - cancel_delayed_work_sync(&vi->refill); - napi_disable(&vi->napi); -@@ -1107,6 +1135,10 @@ static int virtnet_probe(struct virtio_device *vdev) - goto unregister; - } - -+#ifdef DEV_NETMAP -+ virtio_netmap_attach(vi); -+#endif -+ - /* Assume link up if device can't report link status, - otherwise get link status from config. */ - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { -@@ -1170,6 +1202,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev) - { - struct virtnet_info *vi = vdev->priv; - -+#ifdef DEV_NETMAP -+ netmap_detach(vi->dev); -+#endif - unregister_netdev(vi->dev); - - remove_vq_common(vi); diff --git a/netmap/LINUX/final-patches/diff--virtio_net.c--30500--30800 b/netmap/LINUX/final-patches/diff--virtio_net.c--30500--30800 deleted file mode 100644 index 821a503..0000000 --- a/netmap/LINUX/final-patches/diff--virtio_net.c--30500--30800 +++ /dev/null @@ -1,90 +0,0 @@ -diff --git a/virtio_net.c b/virtio_net.c -index f18149a..95e1580 100644 ---- a/virtio_net.c -+++ b/virtio_net.c -@@ -90,6 +90,10 @@ struct virtnet_info { - struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; -@@ -147,6 +151,10 @@ static void skb_xmit_done(struct virtqueue *svq) - /* Suppress further interrupts. */ - virtqueue_disable_cb(svq); - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(vi->dev, 0)) -+ return; -+#endif - /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); - } -@@ -529,7 +537,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) - struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi); - void *buf; - unsigned int len, received = 0; -+#ifdef DEV_NETMAP -+ int work_done = 0; - -+ if (netmap_rx_irq(vi->dev, 0, &work_done)) { -+ napi_complete(napi); -+ ND("called netmap_rx_irq"); -+ -+ return 1; -+ } -+#endif - again: - while (received < budget && - (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) { -@@ -742,6 +759,15 @@ static void virtnet_netpoll(struct net_device *dev) - static int virtnet_open(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); -+#ifdef DEV_NETMAP -+ int ok = virtio_netmap_init_buffers(vi); -+ -+ netmap_enable_all_rings(dev); -+ if (ok) { -+ virtnet_napi_enable(vi); -+ return 0; -+ } -+#endif - - /* Make sure we have some buffers: if oom use wq. */ - if (!try_fill_recv(vi, GFP_KERNEL)) -@@ -810,6 +836,9 @@ static int virtnet_close(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif - /* Make sure refill_work doesn't re-enable napi! */ - cancel_delayed_work_sync(&vi->refill); - napi_disable(&vi->napi); -@@ -1148,6 +1177,10 @@ static int virtnet_probe(struct virtio_device *vdev) - goto unregister; - } - -+#ifdef DEV_NETMAP -+ virtio_netmap_attach(vi); -+#endif -+ - /* Assume link up if device can't report link status, - otherwise get link status from config. */ - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { -@@ -1211,6 +1244,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev) - { - struct virtnet_info *vi = vdev->priv; - -+#ifdef DEV_NETMAP -+ netmap_detach(vi->dev); -+#endif - /* Prevent config work handler from accessing the device. */ - mutex_lock(&vi->config_lock); - vi->config_enable = false; diff --git a/netmap/LINUX/final-patches/diff--virtio_net.c--30800--30b00 b/netmap/LINUX/final-patches/diff--virtio_net.c--30800--30b00 deleted file mode 100644 index c896f5d..0000000 --- a/netmap/LINUX/final-patches/diff--virtio_net.c--30800--30b00 +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/virtio_net.c b/virtio_net.c -index 35c00c5..8aaaa7e 100644 ---- a/virtio_net.c -+++ b/virtio_net.c -@@ -132,6 +132,10 @@ struct virtnet_info { - struct notifier_block nb; - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; -@@ -211,6 +215,10 @@ static void skb_xmit_done(struct virtqueue *vq) - /* Suppress further interrupts. */ - virtqueue_disable_cb(vq); - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(vi->dev, vq2txq(vq))) -+ return; -+#endif - /* We were probably waiting for more output buffers. */ - netif_wake_subqueue(vi->dev, vq2txq(vq)); - } -@@ -603,7 +611,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) - struct virtnet_info *vi = rq->vq->vdev->priv; - void *buf; - unsigned int len, received = 0; -+#ifdef DEV_NETMAP -+ int work_done = 0; - -+ if (netmap_rx_irq(vi->dev, vq2rxq(rq->vq), &work_done)) { -+ napi_complete(napi); -+ ND("called netmap_rx_irq"); -+ -+ return 1; -+ } -+#endif - again: - while (received < budget && - (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { -@@ -635,6 +652,16 @@ static int virtnet_open(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - int i; -+#ifdef DEV_NETMAP -+ int ok = virtio_netmap_init_buffers(vi); -+ -+ netmap_enable_all_rings(dev); -+ if (ok) { -+ for (i = 0; i < vi->max_queue_pairs; i++) -+ virtnet_napi_enable(&vi->rq[i]); -+ return 0; -+ } -+#endif - - for (i = 0; i < vi->max_queue_pairs; i++) { - /* Make sure we have some buffers: if oom use wq. */ -@@ -909,6 +936,9 @@ static int virtnet_close(struct net_device *dev) - struct virtnet_info *vi = netdev_priv(dev); - int i; - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif - /* Make sure refill_work doesn't re-enable napi! */ - cancel_delayed_work_sync(&vi->refill); - -@@ -1572,6 +1602,10 @@ static int virtnet_probe(struct virtio_device *vdev) - goto free_recv_bufs; - } - -+#ifdef DEV_NETMAP -+ virtio_netmap_attach(vi); -+#endif -+ - /* Assume link up if device can't report link status, - otherwise get link status from config. */ - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { -@@ -1618,6 +1652,9 @@ static void virtnet_remove(struct virtio_device *vdev) - { - struct virtnet_info *vi = vdev->priv; - -+#ifdef DEV_NETMAP -+ netmap_detach(vi->dev); -+#endif - unregister_hotcpu_notifier(&vi->nb); - - /* Prevent config work handler from accessing the device. */ diff --git a/netmap/LINUX/final-patches/diff--virtio_net.c--30b00--99999 b/netmap/LINUX/final-patches/diff--virtio_net.c--30b00--99999 deleted file mode 100644 index f9f0fef..0000000 --- a/netmap/LINUX/final-patches/diff--virtio_net.c--30b00--99999 +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/virtio_net.c b/virtio_net.c -index 3d2a90a..ae899a4 100644 ---- a/virtio_net.c -+++ b/virtio_net.c -@@ -131,6 +131,10 @@ struct virtnet_info { - struct notifier_block nb; - }; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif -+ - struct skb_vnet_hdr { - union { - struct virtio_net_hdr hdr; -@@ -210,6 +214,10 @@ static void skb_xmit_done(struct virtqueue *vq) - /* Suppress further interrupts. */ - virtqueue_disable_cb(vq); - -+#ifdef DEV_NETMAP -+ if (netmap_tx_irq(vi->dev, vq2txq(vq))) -+ return; -+#endif - /* We were probably waiting for more output buffers. */ - netif_wake_subqueue(vi->dev, vq2txq(vq)); - } -@@ -603,7 +611,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) - struct virtnet_info *vi = rq->vq->vdev->priv; - void *buf; - unsigned int r, len, received = 0; -+#ifdef DEV_NETMAP -+ int work_done = 0; - -+ if (netmap_rx_irq(vi->dev, vq2rxq(rq->vq), &work_done)) { -+ napi_complete(napi); -+ ND("called netmap_rx_irq"); -+ -+ return 1; -+ } -+#endif - again: - while (received < budget && - (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { -@@ -636,6 +653,16 @@ static int virtnet_open(struct net_device *dev) - { - struct virtnet_info *vi = netdev_priv(dev); - int i; -+#ifdef DEV_NETMAP -+ int ok = virtio_netmap_init_buffers(vi); -+ -+ netmap_enable_all_rings(dev); -+ if (ok) { -+ for (i = 0; i < vi->max_queue_pairs; i++) -+ virtnet_napi_enable(&vi->rq[i]); -+ return 0; -+ } -+#endif - - for (i = 0; i < vi->max_queue_pairs; i++) { - if (i < vi->curr_queue_pairs) -@@ -927,6 +954,9 @@ static int virtnet_close(struct net_device *dev) - struct virtnet_info *vi = netdev_priv(dev); - int i; - -+#ifdef DEV_NETMAP -+ netmap_disable_all_rings(dev); -+#endif - /* Make sure refill_work doesn't re-enable napi! */ - cancel_delayed_work_sync(&vi->refill); - -@@ -1592,6 +1622,10 @@ static int virtnet_probe(struct virtio_device *vdev) - goto free_recv_bufs; - } - -+#ifdef DEV_NETMAP -+ virtio_netmap_attach(vi); -+#endif -+ - /* Assume link up if device can't report link status, - otherwise get link status from config. */ - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { -@@ -1638,6 +1672,9 @@ static void virtnet_remove(struct virtio_device *vdev) - { - struct virtnet_info *vi = vdev->priv; - -+#ifdef DEV_NETMAP -+ netmap_detach(vi->dev); -+#endif - unregister_hotcpu_notifier(&vi->nb); - - /* Prevent config work handler from accessing the device. */ diff --git a/netmap/LINUX/forcedeth_netmap.h b/netmap/LINUX/forcedeth_netmap.h deleted file mode 100644 index 135104f..0000000 --- a/netmap/LINUX/forcedeth_netmap.h +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: forcedeth_netmap.h 10670 2012-02-27 21:15:38Z luigi $ - * - * netmap support for: forcedeth (nfe, linux) - * For details on netmap support see ixgbe_netmap.h - -The driver supports ORIGinal and EXtended descriptors through unions. -We remove the .orig and .ex suffix for brevity. - -Pointers in the ring (N slots) are - first_rx = 0, last_rx = N-1, get_rx = put_rx = 0 at init -Following init there is a call to nv_alloc_rx_optimized() which does - less_rx = get_rx - 1 - for (put_rx = 0; put_rx != less_rx; put_rx++) - put_rx.flags = LEN | NV_RX2_AVAIL; -so it leaves one free slot and put_rx pointing at the end. -Basically, get_rx is where new packets arrive, put_rx is where -new buffers are added. - -The rx_intr aka nv_rx_process_optimized() scans - while (get_rx != put_rx && !(get_rx.flags & NV_RX2_AVAIL)) { - ... - get_rx++ - } -followed by a nv_alloc_rx_optimized(). -This makes sure that there is always a free slot. - - */ - -#include -#include -#include -#define SOFTC_T fe_priv - - -/* - * Register/unregister. We are already under netmap lock. - * only called on the first register or the last unregister. - * The "forcedeth" driver is poorly written, the reinit routine - * is replicated multiple times and one way to achieve it is to - * nv_change_mtu twice above ETH_DATA_LEN. - */ -static int -forcedeth_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *np = netdev_priv(ifp); - u8 __iomem *base = get_hwbase(ifp); - - // first half of nv_change_mtu() - down - nv_disable_irq(ifp); - nv_napi_disable(ifp); - netif_tx_lock_bh(ifp); - netif_addr_lock(ifp); - spin_lock(&np->lock); - /* stop engines */ - nv_stop_rxtx(ifp); - nv_txrx_reset(ifp); - /* drain rx queue */ - nv_drain_rxtx(ifp); - - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - // second half of nv_change_mtu() -- up - if (nv_init_ring(ifp)) { - if (!np->in_shutdown) - mod_timer(&np->oom_kick, jiffies + OOM_REFILL); - } - /* reinit nic view of the rx queue */ - writel(np->rx_buf_sz, base + NvRegOffloadConfig); - setup_hw_rings(ifp, NV_SETUP_RX_RING | NV_SETUP_TX_RING); - writel(((np->rx_ring_size-1) << NVREG_RINGSZ_RXSHIFT) + ((np->tx_ring_size-1) << NVREG_RINGSZ_TXSHIFT), - base + NvRegRingSizes); - pci_push(base); - writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(ifp) + NvRegTxRxControl); - pci_push(base); - /* restart rx engine */ - nv_start_rxtx(ifp); - spin_unlock(&np->lock); - netif_addr_unlock(ifp); - netif_tx_unlock_bh(ifp); - nv_napi_enable(ifp); - nv_enable_irq(ifp); - - return (0); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -forcedeth_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - - /* device-specific */ - struct SOFTC_T *np = netdev_priv(ifp); - struct ring_desc_ex *txr = np->tx_ring.ex; - uint32_t lastpkt = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET); - u_int k; - - /* - * First part: process new packets to send. - */ - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = np->put_tx.ex - txr; // NIC pointer - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - struct ring_desc_ex *put_tx = txr + nic_i; - // XXX check who needs lastpkt - int cmd = (len - 1) | NV_TX2_VALID | lastpkt; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - put_tx->bufhigh = htole32(dma_high(paddr)); - put_tx->buflow = htole32(dma_low(paddr)); - put_tx->flaglen = htole32(cmd); - put_tx->txvlan = 0; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - np->put_tx.ex = txr + nic_i; - kring->nr_hwcur = head; - wmb(); /* synchronize writes to the NIC ring */ - /* restart tx unit where is the new index ? */ - writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, - get_hwbase(ifp) + NvRegTxRxControl); - } - - /* - * Second part: reclaim buffers for completed transmissions - */ - /* Sync the TX descriptor list */ - rmb(); - nic_i = np->get_tx.ex - txr; - k = np->put_tx.ex - txr; - if (nic_i != k) { - for (n = 0; nic_i != k; n++) { - uint32_t cmdstat = le32toh(txr[nic_i].flaglen); - if (cmdstat & NV_TX2_VALID) - break; - if (++nic_i == np->tx_ring_size) - nic_i = 0; - } - if (n > 0) { - np->get_tx.ex = txr + nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - } - -out: - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -forcedeth_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct SOFTC_T *np = netdev_priv(ifp); - struct ring_desc_ex *rxr = np->rx_ring.ex; - u_int refill; // refill position - - if (head > lim) - return netmap_ring_reinit(kring); - - /* - * First part: import newly received packets. - */ - rmb(); - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = np->get_rx.ex - rxr; /* next pkt to check */ - /* put_rx is the refill position, one before nr_hwcur. - * This slot is not available - */ - refill = np->put_rx.ex - rxr; /* refill position */ - nm_i = netmap_idx_n2k(kring, nic_i); - - while (nic_i != refill) { - uint32_t statlen = le32toh(rxr[nic_i].flaglen); - - if (statlen & NV_RX2_AVAIL) /* still owned by the NIC */ - break; - ring->slot[nm_i].len = statlen & LEN_MASK_V2; // XXX crc? - ring->slot[nm_i].flags = slot_flags; - // ifp->stats.rx_packets++; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - np->get_rx.ex = rxr + nic_i; - kring->nr_hwtail = nm_i; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; // refill is one before nic_i - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - refill = np->put_rx.ex - rxr; /* refill position */ - - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - struct ring_desc_ex *desc = rxr + nic_i; - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - - desc->flaglen = htole32(NETMAP_BUF_SIZE); - desc->bufhigh = htole32(dma_high(paddr)); - desc->buflow = htole32(dma_low(paddr)); - // enable the previous buffer - rxr[refill].flaglen |= htole32(NV_RX2_AVAIL); - refill = nm_next(refill, lim); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - np->put_rx.ex = rxr + refill; - /* Flush the RX DMA ring */ - wmb(); - } - - /* tell userspace that there are might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * Additional routines to init the tx and rx rings. - * In other drivers we do that inline in the main code. - */ -static int -forcedeth_netmap_tx_init(struct SOFTC_T *np) -{ - struct ring_desc_ex *desc; - int i, n; - struct netmap_adapter *na = NA(np->dev); - struct netmap_slot *slot; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_TX, 0, 0); - /* slot is NULL if we are not in netmap mode */ - if (!slot) - return 0; - /* in netmap mode, overwrite addresses and maps */ - //txd = np->rl_ldata.rl_tx_desc; - desc = np->tx_ring.ex; - n = np->tx_ring_size; - - /* l points in the netmap ring, i points in the NIC ring */ - for (i = 0; i < n; i++) { - int l = netmap_idx_n2k(&na->tx_rings[0], i); - uint64_t paddr; - PNMB(slot + l, &paddr); - desc[i].flaglen = 0; - desc[i].bufhigh = htole32(dma_high(paddr)); - desc[i].buflow = htole32(dma_low(paddr)); - } - return 1; -} - - -static int -forcedeth_netmap_rx_init(struct SOFTC_T *np) -{ - struct netmap_adapter *na = NA(np->dev); - struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0); - struct ring_desc_ex *desc = np->rx_ring.ex; - uint32_t cmdstat; - int i, lim; - - if (!slot) - return 0; - /* - * Do not release the slots owned by userspace, - * and also keep one empty. - */ - lim = np->rx_ring_size - 1 - nm_kr_rxspace(&na->rx_rings[0]); - for (i = 0; i < np->rx_ring_size; i++) { - void *addr; - uint64_t paddr; - int l = netmap_idx_n2k(&na->rx_rings[0], i); - - addr = PNMB(slot + l, &paddr); - netmap_reload_map(np->rl_ldata.rl_rx_mtag, - np->rl_ldata.rl_rx_desc[i].rx_dmamap, addr); - desc[i].bufhigh = htole32(dma_high(paddr)); - desc[i].buflow = htole32(dma_low(paddr)); - cmdstat = NETMAP_BUF_SIZE; - if (i < lim) - cmdstat |= NV_RX2_AVAIL; - desc[i].flaglen = htole32(cmdstat); - } - // XXX ring end anywhere ? - np->get_rx.ex = desc; - np->put_rx.ex = desc + lim; - return 1; -} - - -static void -forcedeth_netmap_attach(struct SOFTC_T *np) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = np->dev; - na.num_tx_desc = np->tx_ring_size; - na.num_rx_desc = np->tx_ring_size; - na.nm_txsync = forcedeth_netmap_txsync; - na.nm_rxsync = forcedeth_netmap_rxsync; - na.nm_register = forcedeth_netmap_reg; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/LINUX/if_e1000_netmap.h b/netmap/LINUX/if_e1000_netmap.h deleted file mode 100644 index b30661d..0000000 --- a/netmap/LINUX/if_e1000_netmap.h +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright (C) 2012-2014 Gaetano Catalli, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: if_e1000_netmap.h 10878 2012-04-12 22:28:48Z luigi $ - * - * netmap support for: e1000 (linux version) - * For details on netmap support please see ixgbe_netmap.h - */ - - -#include -#include -#include - -#define SOFTC_T e1000_adapter - - -/* - * Register/unregister. We are already under netmap lock. - */ -static int -e1000_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *adapter = netdev_priv(ifp); - - /* protect against other reinit */ - while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) - usleep_range(1000, 2000); - - rtnl_lock(); - if (netif_running(adapter->netdev)) - e1000_down(adapter); - - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - if (netif_running(adapter->netdev)) - e1000_up(adapter); - else - e1000_reset(adapter); - - rtnl_unlock(); - clear_bit(__E1000_RESETTING, &adapter->flags); - return (0); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -e1000_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* generate an interrupt approximately every half ring */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct e1000_tx_ring* txr = &adapter->tx_ring[ring_nr]; - - rmb(); - /* - * First part: process new packets to send. - */ - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - struct e1000_tx_desc *curr = E1000_TX_DESC(*txr, nic_i); - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - E1000_TXD_CMD_RS : 0; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, paddr); - curr->buffer_addr = htole64(paddr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - curr->upper.data = 0; - curr->lower.data = htole32(adapter->txd_cmd | - len | flags | - E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - wmb(); /* synchronize writes to the NIC ring */ - txr->next_to_use = nic_i; /* XXX what for ? */ - /* (re)start the tx unit up to slot nic_i (excluded) */ - writel(nic_i, adapter->hw.hw_addr + txr->tdt); - mmiowb(); // XXX where do we need this ? - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - /* record completed transmissions using TDH */ - nic_i = readl(adapter->hw.hw_addr + txr->tdh); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - txr->next_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } -out: - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -e1000_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct e1000_rx_ring *rxr = &adapter->rx_ring[ring_nr]; - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - if (head > lim) - return netmap_ring_reinit(kring); - - rmb(); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = rxr->next_to_clean; - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - struct e1000_rx_desc *curr = E1000_RX_DESC(*rxr, nic_i); - uint32_t staterr = le32toh(curr->status); - - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->length) - 4; - ring->slot[nm_i].flags = slot_flags; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - rxr->next_to_clean = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - struct e1000_rx_desc *curr = E1000_RX_DESC(*rxr, nic_i); - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - if (slot->flags & NS_BUF_CHANGED) { - // netmap_reload_map(...) - curr->buffer_addr = htole64(paddr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->status = 0; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - rxr->next_to_use = nic_i; // XXX not really used - wmb(); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - writel(nic_i, adapter->hw.hw_addr + rxr->rdt); - } -out: - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* diagnostic routine to catch errors */ -static void e1000_no_rx_alloc(struct SOFTC_T *adapter, - struct e1000_rx_ring *rxr, int cleaned_count) -{ - D("e1000->alloc_rx_buf should not be called"); -} - - -/* - * Make the tx and rx rings point to the netmap buffers. - */ -static int e1000_netmap_init_buffers(struct SOFTC_T *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->netdev; - struct netmap_adapter* na = NA(ifp); - struct netmap_slot* slot; - struct e1000_tx_ring* txr = &adapter->tx_ring[0]; - unsigned int i, r, si; - uint64_t paddr; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - adapter->alloc_rx_buf = e1000_no_rx_alloc; - for (r = 0; r < na->num_rx_rings; r++) { - struct e1000_rx_ring *rxr; - slot = netmap_reset(na, NR_RX, r, 0); - if (!slot) { - D("strange, null netmap ring %d", r); - return 0; - } - rxr = &adapter->rx_ring[r]; - - for (i = 0; i < rxr->count; i++) { - // XXX the skb check and cleanup can go away - struct e1000_buffer *bi = &rxr->buffer_info[i]; - si = netmap_idx_n2k(&na->rx_rings[r], i); - PNMB(slot + si, &paddr); - if (bi->skb) - D("rx buf %d was set", i); - bi->skb = NULL; - // netmap_load_map(...) - E1000_RX_DESC(*rxr, i)->buffer_addr = htole64(paddr); - } - - rxr->next_to_use = 0; - /* preserve buffers already made available to clients */ - i = rxr->count - 1 - nm_kr_rxspace(&na->rx_rings[0]); - if (i < 0) // XXX something wrong here, can it really happen ? - i += rxr->count; - D("i now is %d", i); - wmb(); /* Force memory writes to complete */ - writel(i, hw->hw_addr + rxr->rdt); - } - /* now initialize the tx ring(s) */ - slot = netmap_reset(na, NR_TX, 0, 0); - for (i = 0; i < na->num_tx_desc; i++) { - si = netmap_idx_n2k(&na->tx_rings[0], i); - PNMB(slot + si, &paddr); - // netmap_load_map(...) - E1000_TX_DESC(*txr, i)->buffer_addr = htole64(paddr); - } - return 1; -} - - -static void -e1000_netmap_attach(struct SOFTC_T *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->netdev; - na.num_tx_desc = adapter->tx_ring[0].count; - na.num_rx_desc = adapter->rx_ring[0].count; - na.nm_register = e1000_netmap_reg; - na.nm_txsync = e1000_netmap_txsync; - na.nm_rxsync = e1000_netmap_rxsync; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/LINUX/if_e1000e_netmap.h b/netmap/LINUX/if_e1000e_netmap.h deleted file mode 100644 index 24f3b76..0000000 --- a/netmap/LINUX/if_e1000e_netmap.h +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (C) 2012-2014 Gaetano Catalli, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: if_e1000e_netmap.h 10670 2012-02-27 21:15:38Z luigi $ - * - * netmap support for: e1000e (linux version) - * For details on netmap support please see ixgbe_netmap.h - * The driver supports 1 TX and 1 RX ring. Single lock. - * tx buffer address only written on change. - * Apparently the driver uses extended descriptors on rx from 3.2.32 - * Rx Crc stripping ? - */ - - -#include -#include -#include - -#define SOFTC_T e1000_adapter - -/* - * Adaptation to different versions of the driver. - */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0) -#warning this driver uses extended descriptors -#define NM_E1K_RX_DESC_T union e1000_rx_desc_extended -#define NM_E1R_RX_STATUS wb.upper.status_error -#define NM_E1R_RX_LENGTH wb.upper.length -#define NM_E1R_RX_BUFADDR read.buffer_addr -#else -#warning this driver uses regular descriptors -#define E1000_RX_DESC_EXT E1000_RX_DESC // XXX workaround -#define NM_E1K_RX_DESC_T struct e1000_rx_desc -#define NM_E1R_RX_STATUS status -#define NM_E1R_RX_BUFADDR buffer_addr -#define NM_E1R_RX_LENGTH length -#endif /* up to 3.2.x */ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) -#define NM_WR_TX_TAIL(_x) writel(_x, txr->tail) // XXX tx_ring -#define NM_WR_RX_TAIL(_x) writel(_x, rxr->tail) // XXX rx_ring -#define NM_RD_TX_HEAD() readl(txr->head) -#else -#define NM_WR_TX_TAIL(_x) writel(_x, adapter->hw.hw_addr + txr->tail) -#define NM_WR_RX_TAIL(_x) writel(_x, adapter->hw.hw_addr + rxr->tail) -#define NM_RD_TX_HEAD() readl(adapter->hw.hw_addr + txr->head) -#endif /* < 3.4.0 */ - - -/* - * Register/unregister. We are already under netmap lock. - */ -static int -e1000_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *adapter = netdev_priv(ifp); - - /* protect against other reinit */ - while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - rtnl_lock(); - if (netif_running(adapter->netdev)) - e1000e_down(adapter); - - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - - if (netif_running(adapter->netdev)) - e1000e_up(adapter); - else - e1000e_reset(adapter); // XXX is it needed ? - - rtnl_unlock(); - - clear_bit(__E1000_RESETTING, &adapter->state); - return (0); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -e1000_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* generate an interrupt approximately every half ring */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct e1000_ring* txr = &adapter->tx_ring[ring_nr]; - - rmb(); - /* - * First part: process new packets to send. - */ - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - struct e1000_tx_desc *curr = E1000_TX_DESC(*txr, nic_i); - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - E1000_TXD_CMD_RS : 0; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr) - curr->buffer_addr = htole64(paddr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - curr->upper.data = 0; - curr->lower.data = htole32(adapter->txd_cmd | len | flags | - E1000_TXD_CMD_EOP); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - wmb(); /* synchronize writes to the NIC ring */ - - txr->next_to_use = nic_i; - NM_WR_TX_TAIL(nic_i); - mmiowb(); // XXX where do we need this ? - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - /* record completed transmissions using TDH */ - nic_i = NM_RD_TX_HEAD(); // XXX could scan descriptors ? - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - txr->next_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } -out: - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -e1000_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct e1000_ring *rxr = &adapter->rx_ring[ring_nr]; - - if (!netif_carrier_ok(ifp)) - return 0; - - if (head > lim) - return netmap_ring_reinit(kring); - - rmb(); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - int strip_crc = (adapter->flags2 & FLAG2_CRC_STRIPPING) ? 0 : 4; - - nic_i = rxr->next_to_clean; - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - NM_E1K_RX_DESC_T *curr = E1000_RX_DESC_EXT(*rxr, nic_i); - uint32_t staterr = le32toh(curr->NM_E1R_RX_STATUS); - - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->NM_E1R_RX_LENGTH) - strip_crc; - ring->slot[nm_i].flags = slot_flags; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - rxr->next_to_clean = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - NM_E1K_RX_DESC_T *curr = E1000_RX_DESC_EXT(*rxr, nic_i); - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - curr->NM_E1R_RX_BUFADDR = htole64(paddr); /* reload ext.desc. addr. */ - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr) - slot->flags &= ~NS_BUF_CHANGED; - } - curr->NM_E1R_RX_STATUS = 0; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - rxr->next_to_use = nic_i; // XXX not really used - wmb(); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - NM_WR_RX_TAIL(nic_i); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* diagnostic routine to catch errors */ -static void e1000e_no_rx_alloc(struct SOFTC_T *a, int n) -{ - D("e1000->alloc_rx_buf should not be called"); -} - - -/* - * Make the tx and rx rings point to the netmap buffers. - */ -static int e1000e_netmap_init_buffers(struct SOFTC_T *adapter) -{ - struct ifnet *ifp = adapter->netdev; - struct netmap_adapter* na = NA(ifp); - struct netmap_slot* slot; - struct e1000_ring *rxr = adapter->rx_ring; - struct e1000_ring *txr = adapter->tx_ring; - int i, si; - uint64_t paddr; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_RX, 0, 0); - if (!slot) - return 0; // not in netmap mode XXX check is useless - - adapter->alloc_rx_buf = (void*)e1000e_no_rx_alloc; - for (i = 0; i < rxr->count; i++) { - // XXX the skb check and cleanup can go away - struct e1000_buffer *bi = &rxr->buffer_info[i]; - si = netmap_idx_n2k(&na->rx_rings[0], i); - PNMB(slot + si, &paddr); - if (bi->skb) - D("rx buf %d was set", i); - bi->skb = NULL; // XXX leak if set - // netmap_load_map(...) - E1000_RX_DESC_EXT(*rxr, i)->NM_E1R_RX_BUFADDR = htole64(paddr); - } - rxr->next_to_use = 0; - /* preserve buffers already made available to clients */ - i = rxr->count - 1 - nm_kr_rxspace(&na->rx_rings[0]); - wmb(); /* Force memory writes to complete */ - NM_WR_RX_TAIL(i); - - /* now initialize the tx ring */ - slot = netmap_reset(na, NR_TX, 0, 0); - for (i = 0; i < na->num_tx_desc; i++) { - si = netmap_idx_n2k(&na->tx_rings[0], i); - PNMB(slot + si, &paddr); - // netmap_load_map(...) - E1000_TX_DESC(*txr, i)->buffer_addr = htole64(paddr); - } - return 1; -} - - -static void -e1000_netmap_attach(struct SOFTC_T *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->netdev; - na.num_tx_desc = adapter->tx_ring->count; - na.num_rx_desc = adapter->rx_ring->count; - na.nm_register = e1000_netmap_reg; - na.nm_txsync = e1000_netmap_txsync; - na.nm_rxsync = e1000_netmap_rxsync; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/LINUX/if_igb_netmap.h b/netmap/LINUX/if_igb_netmap.h deleted file mode 100644 index 189a937..0000000 --- a/netmap/LINUX/if_igb_netmap.h +++ /dev/null @@ -1,400 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: if_igb_netmap.h 10878 2012-04-12 22:28:48Z luigi $ - * - * netmap support for: igb (linux version) - * For details on netmap support please see ixgbe_netmap.h - */ - - -#include -#include -#include - -#define SOFTC_T igb_adapter - - -/* - * Adapt to different versions of the driver. - * E1000_TX_DESC_ADV etc. have dropped the _ADV suffix at some point. - * Also the first argument is now a pointer not the object. - */ -#ifndef E1000_TX_DESC_ADV -#define E1000_TX_DESC_ADV(_r, _i) IGB_TX_DESC(&(_r), _i) -#define E1000_RX_DESC_ADV(_r, _i) IGB_RX_DESC(&(_r), _i) -#define READ_TDH(_txr) ({struct e1000_hw *hw = &adapter->hw;rd32(E1000_TDH((_txr)->reg_idx));} ) -#else /* up to 3.2, approximately */ -#define igb_tx_buffer igb_buffer -#define tx_buffer_info buffer_info -#define igb_rx_buffer igb_buffer -#define rx_buffer_info buffer_info -#define READ_TDH(_txr) readl((_txr)->head) -#endif - - -/* - * Register/unregister. We are already under netmap lock. - * Only called on the first register or the last unregister. - */ -static int -igb_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *adapter = netdev_priv(ifp); - - /* protect against other reinit */ - while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - rtnl_lock(); - if (netif_running(adapter->netdev)) - igb_down(adapter); - - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - if (netif_running(adapter->netdev)) - igb_up(adapter); - else - igb_reset(adapter); // XXX is it needed ? - - rtnl_unlock(); - - clear_bit(__IGB_RESETTING, &adapter->state); - return (0); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -igb_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* generate an interrupt approximately every half ring */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct igb_ring* txr = adapter->tx_ring[ring_nr]; - - rmb(); // XXX not in ixgbe ? - - /* - * First part: process new packets to send. - */ - if (!netif_carrier_ok(ifp)) { - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - uint32_t olinfo_status=0; - - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - union e1000_adv_tx_desc *curr = - E1000_TX_DESC_ADV(*txr, nic_i); - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - E1000_TXD_CMD_RS : 0; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - curr->read.buffer_addr = htole64(paddr); - // XXX check olinfo and cmd_type_len - curr->read.olinfo_status = - htole32(olinfo_status | - (len<< E1000_ADVTXD_PAYLEN_SHIFT)); - curr->read.cmd_type_len = htole32(len | flags | - E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT | - E1000_ADVTXD_DCMD_IFCS | E1000_TXD_CMD_EOP); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - wmb(); /* synchronize writes to the NIC ring */ - - txr->next_to_use = nic_i; /* XXX what for ? */ - /* (re)start the tx unit up to slot nic_i (excluded) */ - writel(nic_i, txr->tail); - mmiowb(); // XXX where do we need this ? - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - /* record completed transmissions using TDH */ - nic_i = READ_TDH(txr); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - txr->next_to_use = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } -out: - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -igb_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct igb_ring *rxr = adapter->rx_ring[ring_nr]; - - if (!netif_carrier_ok(ifp)) - return 0; - - if (head > lim) - return netmap_ring_reinit(kring); - - rmb(); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = rxr->next_to_clean; - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - union e1000_adv_rx_desc *curr = - E1000_RX_DESC_ADV(*rxr, nic_i); - uint32_t staterr = le32toh(curr->wb.upper.status_error); - - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->wb.upper.length); - ring->slot[nm_i].flags = slot_flags; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - rxr->next_to_clean = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - union e1000_adv_rx_desc *curr = E1000_RX_DESC_ADV(*rxr, nic_i); - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - // netmap_reload_map(pdev, DMA_FROM_DEVICE, old_paddr, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->read.pkt_addr = htole64(paddr); - curr->read.hdr_addr = 0; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - wmb(); - rxr->next_to_use = nic_i; // XXX not really used - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - writel(nic_i, rxr->tail); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -static int -igb_netmap_configure_tx_ring(struct SOFTC_T *adapter, int ring_nr) -{ - struct ifnet *ifp = adapter->netdev; - struct netmap_adapter* na = NA(ifp); - struct netmap_slot* slot; - struct igb_ring *txr = adapter->tx_ring[ring_nr]; - int i, si; - void *addr; - uint64_t paddr; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_TX, ring_nr, 0); - if (!slot) - return 0; // XXX this should never happen - for (i = 0; i < na->num_tx_desc; i++) { - union e1000_adv_tx_desc *tx_desc; - si = netmap_idx_n2k(&na->tx_rings[ring_nr], i); - addr = PNMB(slot + si, &paddr); - tx_desc = E1000_TX_DESC_ADV(*txr, i); - tx_desc->read.buffer_addr = htole64(paddr); - /* actually we don't care to init the rings here */ - } - return 1; // success -} - - -static int -igb_netmap_configure_rx_ring(struct igb_ring *rxr) -{ - struct ifnet *ifp = rxr->netdev; - struct netmap_adapter* na = NA(ifp); - int reg_idx = rxr->reg_idx; - struct netmap_slot* slot; - u_int i; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - /* - * XXX watch out, the main driver must not use - * split headers. The buffer len should be written - * into wr32(E1000_SRRCTL(reg_idx), srrctl) with options - * something like - * srrctl = ALIGN(buffer_len, 1024) >> - * E1000_SRRCTL_BSIZEPKT_SHIFT; - * srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; - * srrctl |= E1000_SRRCTL_DROP_EN; - */ - slot = netmap_reset(na, NR_RX, reg_idx, 0); - if (!slot) - return 0; // not in netmap mode - - for (i = 0; i < rxr->count; i++) { - union e1000_adv_rx_desc *rx_desc; - uint64_t paddr; - int si = netmap_idx_n2k(&na->rx_rings[reg_idx], i); - -#if 0 - // XXX the skb check can go away - struct igb_rx_buffer *bi = &rxr->rx_buffer_info[i]; - if (bi->skb) - D("rx buf %d was set", i); - bi->skb = NULL; // XXX leak if set -#endif /* useless */ - - PNMB(slot + si, &paddr); - rx_desc = E1000_RX_DESC_ADV(*rxr, i); - rx_desc->read.hdr_addr = 0; - rx_desc->read.pkt_addr = htole64(paddr); - } - rxr->next_to_use = 0; - /* preserve buffers already made available to clients */ - i = rxr->count - 1 - nm_kr_rxspace(&na->rx_rings[reg_idx]); - - wmb(); /* Force memory writes to complete */ - ND("%s rxr%d.tail %d", ifp->if_xname, reg_idx, i); - writel(i, rxr->tail); - return 1; // success -} - - -static void -igb_netmap_attach(struct SOFTC_T *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->netdev; - na.num_tx_desc = adapter->tx_ring_count; - na.num_rx_desc = adapter->rx_ring_count; - na.nm_register = igb_netmap_reg; - na.nm_txsync = igb_netmap_txsync; - na.nm_rxsync = igb_netmap_rxsync; - na.num_tx_rings = adapter->num_tx_queues; - na.num_rx_rings = adapter->num_rx_queues; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/LINUX/if_re_netmap_linux.h b/netmap/LINUX/if_re_netmap_linux.h deleted file mode 100644 index 613afbb..0000000 --- a/netmap/LINUX/if_re_netmap_linux.h +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -/* - * $Id: if_re_netmap_linux.h 10679 2012-02-28 13:42:18Z luigi $ - * - * netmap support for: r8169 (re, linux version) - * For details on netmap support please see ixgbe_netmap.h - * 1 tx ring, 1 rx ring, 1 lock, crcstrip ? reinit tx addr, - */ - - -#include -#include -#include - - -static void rtl8169_wait_for_quiescence(struct ifnet *); -#define SOFTC_T rtl8169_private - - -/* - * Register/unregister, mostly the reinit task - */ -static int -re_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - int error = 0; - - rtnl_lock(); - rtl8169_wait_for_quiescence(ifp); - rtl8169_close(ifp); - - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - - if (rtl8169_open(ifp) < 0) { - error = ENOMEM; - goto fail; - } - } else { -fail: - nm_clear_native_flags(na); - error = rtl8169_open(ifp) ? EINVAL : 0; - } - rtnl_unlock(); - return (error); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -re_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - - /* device-specific */ - struct SOFTC_T *sc = netdev_priv(ifp); - void __iomem *ioaddr = sc->mmio_addr; - - rmb(); - - /* - * First part: process new packets to send. - */ - if (!netif_carrier_ok(ifp)) { - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = sc->cur_tx; // XXX use internal macro ? - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - struct TxDesc *curr = &sc->TxDescArray[nic_i]; - uint32_t flags = slot->len | LastFrag | DescOwn | FirstFrag ; - - NM_CHECK_ADDR_LEN(addr, len); - - if (nic_i == lim) /* mark end of ring */ - flags |= RingEnd; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr); - curr->addr = htole64(paddr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - curr->opts1 = htole32(flags); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - sc->cur_tx = nic_i; - wmb(); /* synchronize writes to the NIC ring */ - RTL_W8(TxPoll, NPQ); /* start ? */ - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - for (n = 0, nic_i = sc->dirty_tx; nic_i != sc->cur_tx; n++) { - if (le32toh(sc->TxDescArray[nic_i].opts1) & DescOwn) - break; - if (++nic_i == NUM_TX_DESC) - nic_i = 0; - } - if (n > 0) { - sc->dirty_tx = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - } -out: - nm_txsync_finalize(kring); - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -re_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct SOFTC_T *sc = netdev_priv(ifp); - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - if (!netif_carrier_ok(ifp)) - return 0; - - if (head > lim) - return netmap_ring_reinit(kring); - - rmb(); - /* - * First part: import newly received packets. - * - * NOTE: This device uses all the buffers in the ring, so we - * need another termination condition in addition to DescOwn - * cleared (all buffers could have it cleared. The easiest one - * is to stop right before nm_hwcur. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - uint32_t stop_i = nm_prev(kring->nr_hwcur, lim); - - nic_i = sc->cur_rx; /* next pkt to check */ - nm_i = netmap_idx_n2k(kring, nic_i); - - while (nm_i != stop_i) { - struct RxDesc *cur_rx = &sc->RxDescArray[nic_i]; - uint32_t rxstat = le32toh(cur_rx->opts1); - uint32_t total_len; - - if ((rxstat & DescOwn) != 0) - break; - total_len = rxstat & 0x00001FFF; - /* XXX subtract crc */ - total_len = (total_len < 4) ? 0 : total_len - 4; - ring->slot[nm_i].len = total_len; - ring->slot[nm_i].flags = slot_flags; - // ifp->stats.rx_packets++; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - sc->cur_rx = nic_i; - kring->nr_hwtail = nm_i; - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - struct RxDesc *curr = &sc->RxDescArray[nic_i]; - uint32_t flags = NETMAP_BUF_SIZE | DescOwn; - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (nic_i == lim) /* mark end of ring */ - flags |= RingEnd; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr); - curr->addr = htole64(paddr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->opts1 = htole32(flags); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - wmb(); // XXX needed ? - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * Additional routines to init the tx and rx rings. - * In other drivers we do that inline in the main code. - */ -static int -re_netmap_tx_init(struct SOFTC_T *sc) -{ - struct netmap_adapter *na = NA(sc->dev); - struct netmap_slot *slot; - struct TxDesc *desc = sc->TxDescArray; - int i, l; - uint64_t paddr; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_TX, 0, 0); - /* slot is NULL if we are not in netmap mode XXX cannot happen */ - if (!slot) - return 0; - - /* l points in the netmap ring, i points in the NIC ring */ - for (i = 0; i < na->num_tx_desc; i++) { - l = netmap_idx_n2k(&na->tx_rings[0], i); - PNMB(slot + l, &paddr); - desc[i].addr = htole64(paddr); - } - return 1; -} - - -static int -re_netmap_rx_init(struct SOFTC_T *sc) -{ - struct netmap_adapter *na = NA(sc->dev); - struct netmap_slot *slot; - struct RxDesc *desc = sc->RxDescArray; - uint32_t cmdstat; - int i, lim, l; - uint64_t paddr; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_RX, 0, 0); - if (!slot) - return 0; /* XXX cannot happen */ - /* - * Do not release the slots owned by userspace - * XXX we use all slots, so no '-1' here - * XXX do we need -1 instead ? - */ - lim = na->num_rx_desc /* - 1 */ - nm_kr_rxspace(&na->rx_rings[0]); - for (i = 0; i < na->num_rx_desc; i++) { - l = netmap_idx_n2k(&na->rx_rings[0], i); - PNMB(slot + l, &paddr); - cmdstat = NETMAP_BUF_SIZE; - if (i == na->num_rx_desc - 1) - cmdstat |= RingEnd; - if (i < lim) - cmdstat |= DescOwn; - desc[i].opts1 = htole32(cmdstat); - desc[i].addr = htole64(paddr); - } - return 1; -} - - -static void -re_netmap_attach(struct SOFTC_T *sc) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = sc->dev; - na.num_tx_desc = NUM_TX_DESC; - na.num_rx_desc = NUM_RX_DESC; - na.nm_txsync = re_netmap_txsync; - na.nm_rxsync = re_netmap_rxsync; - na.nm_register = re_netmap_reg; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/LINUX/ixgbe_netmap_linux.h b/netmap/LINUX/ixgbe_netmap_linux.h deleted file mode 100644 index e4ce396..0000000 --- a/netmap/LINUX/ixgbe_netmap_linux.h +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 230572 2012-01-26 09:55:16Z luigi $ - * - * netmap support for: ixgbe (LINUX version) - * - * This file is meant to be a reference on how to implement - * netmap support for a network driver. - * This file contains code but only static or inline functions used - * by a single driver. To avoid replication of code we just #include - * it near the beginning of the standard driver. - */ - - -#include -#include -#include - -#define SOFTC_T ixgbe_adapter - -/* - * Adaptation to different versions of the driver. - * Recent drivers (3.4 and above) redefine some macros - */ -#ifndef IXGBE_TX_DESC_ADV -#define IXGBE_TX_DESC_ADV IXGBE_TX_DESC -#define IXGBE_RX_DESC_ADV IXGBE_RX_DESC -#endif - - -/* - * Register/unregister. We are already under netmap lock. - * Only called on the first register or the last unregister. - */ -static int -ixgbe_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *adapter = netdev_priv(ifp); - - // adapter->netdev->trans_start = jiffies; // disable watchdog ? - /* protect against other reinit */ - while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) - usleep_range(1000, 2000); - - rtnl_lock(); - if (netif_running(adapter->netdev)) - ixgbe_down(adapter); - - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - /* XXX SRIOV migth need another 2sec wait */ - if (netif_running(adapter->netdev)) - ixgbe_up(adapter); /* also enables intr */ - rtnl_unlock(); - - clear_bit(__IXGBE_RESETTING, &adapter->state); - return (0); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - * - * Userspace wants to send packets up to the one before ring->head, - * kernel knows kring->nr_hwcur is the first unsent packet. - * - * Here we push packets out (as many as possible), and possibly - * reclaim buffers from previously completed transmission. - * - * ring->tail is updated on return. - * ring->head is never used here. - * - * The caller (netmap) guarantees that there is only one instance - * running at any time. Any interference with other driver - * methods should be handled by the individual drivers. - */ -static int -ixgbe_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* - * interrupts on every tx packet are expensive so request - * them every half ring, or where NS_REPORT is set - */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct ixgbe_ring *txr = adapter->tx_ring[ring_nr]; - int reclaim_tx; - - /* - * First part: process new packets to send. - * nm_i is the current index in the netmap ring, - * nic_i is the corresponding index in the NIC ring. - * The two numbers differ because upon a *_init() we reset - * the NIC ring but leave the netmap ring unchanged. - * For the transmit ring, we have - * - * nm_i = kring->nr_hwcur - * nic_i = IXGBE_TDT (not tracked in the driver) - * and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - * - * In this driver kring->nkr_hwofs >= 0, but for other - * drivers it might be negative as well. - */ - - /* - * If we have packets to send (kring->nr_hwcur != ring->cur) - * iterate over the netmap ring, fetch length and update - * the corresponding slot in the NIC ring. Some drivers also - * need to update the buffer's physical address in the NIC slot - * even NS_BUF_CHANGED is not set (PNMB computes the addresses). - * - * The netmap_reload_map() calls is especially expensive, - * even when (as in this case) the tag is 0, so do only - * when the buffer has actually changed. - * - * If possible do not set the report/intr bit on all slots, - * but only a few times per ring or when NS_REPORT is set. - * - * Finally, on 10G and faster drivers, it might be useful - * to prefetch the next slot and txr entry. - */ - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - union ixgbe_adv_tx_desc *curr = IXGBE_TX_DESC_ADV(txr, nic_i); - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - IXGBE_TXD_CMD_RS : 0; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - curr->read.buffer_addr = htole64(paddr); - curr->read.olinfo_status = htole32(len << IXGBE_ADVTXD_PAYLEN_SHIFT); - curr->read.cmd_type_len = htole32(len | flags | - IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - wmb(); /* synchronize writes to the NIC ring */ - /* (re)start the tx unit up to slot nic_i (excluded) */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->reg_idx), nic_i); - } - - /* - * Second part: reclaim buffers for completed transmissions. - * Because this is expensive (we read a NIC register etc.) - * we only do it in specific cases (see below). - */ - if (flags & NAF_FORCE_RECLAIM) { - reclaim_tx = 1; /* forced reclaim */ - } else if (!nm_kr_txempty(kring)) { - reclaim_tx = 0; /* have buffers, no reclaim */ - } else { - /* - * No buffers available. Locate previous slot with - * REPORT_STATUS set. - * If the slot has DD set, we can reclaim space, - * otherwise wait for the next interrupt. - * This enables interrupt moderation on the tx - * side though it might reduce throughput. - */ - union ixgbe_adv_tx_desc *txd = IXGBE_TX_DESC_ADV(txr, 0); - - nic_i = txr->next_to_clean + report_frequency; - if (nic_i > lim) - nic_i -= lim + 1; - // round to the closest with dd set - nic_i = (nic_i < kring->nkr_num_slots / 4 || - nic_i >= kring->nkr_num_slots*3/4) ? - 0 : report_frequency; - reclaim_tx = txd[nic_i].wb.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ? - } - if (reclaim_tx) { - /* - * Record completed transmissions. - * We (re)use the driver's txr->next_to_clean to keep - * track of the most recently completed transmission. - * - * The datasheet discourages the use of TDH to find - * out the number of sent packets, but we only set - * REPORT STATUS in a few slots so TDH is the only - * good way. - */ - nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr)); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - txr->next_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } -out: - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - * Same as for the txsync, this routine must be efficient. - * The caller guarantees a single invocations, but races against - * the rest of the driver should be handled here. - * - * When called, userspace has released buffers up to ring->head - * (last one excluded). - * - * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective - * of whether or not we received an interrupt. - */ -static int -ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct SOFTC_T *adapter = netdev_priv(ifp); - struct ixgbe_ring *rxr = adapter->rx_ring[ring_nr]; - - if (!netif_carrier_ok(ifp)) - return 0; - - if (head > lim) - return netmap_ring_reinit(kring); - - rmb(); - - /* - * First part: import newly received packets. - * - * nm_i is the index of the next free slot in the netmap ring, - * nic_i is the index of the next received packet in the NIC ring, - * and they may differ in case if_init() has been called while - * in netmap mode. For the receive ring we have - * - * nm_i = (kring->nr_hwtail) - * nic_i = rxr->next_to_clean; // really next to check - * and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - * - * rxr->next_to_clean is set to 0 on a ring reinit - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = rxr->next_to_clean; - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - union ixgbe_adv_rx_desc *curr = IXGBE_RX_DESC_ADV(rxr, nic_i); - uint32_t staterr = le32toh(curr->wb.upper.status_error); - - if ((staterr & IXGBE_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->wb.upper.length); - ring->slot[nm_i].flags = slot_flags; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - rxr->next_to_clean = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - * (kring->nr_hwcur to ring->head excluded), - * and make the buffers available for reception. - * As usual nm_i is the index in the netmap ring, - * nic_i is the index in the NIC ring, and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - union ixgbe_adv_rx_desc *curr = IXGBE_RX_DESC_ADV(rxr, nic_i); - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->wb.upper.status_error = 0; - curr->read.pkt_addr = htole64(paddr); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - rxr->next_to_use = nic_i; // XXX not really used - wmb(); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->reg_idx), nic_i); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * if in netmap mode, attach the netmap buffers to the ring and return true. - * Otherwise return false. - */ -static int -ixgbe_netmap_configure_tx_ring(struct SOFTC_T *adapter, int ring_nr) -{ - struct netmap_adapter *na = NA(adapter->netdev); - struct netmap_slot *slot; - //int j; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_TX, ring_nr, 0); - if (!slot) - return 0; // not in netmap; XXX cannot happen -#if 0 - /* - * on a generic card we should set the address in the slot. - * But on the ixgbe, the address needs to be rewritten - * after a transmission so there is nothing do to except - * loading the map. - */ - for (j = 0; j < na->num_tx_desc; j++) { - int sj = netmap_idx_n2k(&na->tx_rings[ring_nr], j); - uint64_t paddr; - void *addr = PNMB(slot + sj, &paddr); - } -#endif - return 1; -} - - -static int -ixgbe_netmap_configure_rx_ring(struct SOFTC_T *adapter, int ring_nr) -{ - /* - * In netmap mode, we must preserve the buffers made - * available to userspace before the if_init() - * (this is true by default on the TX side, because - * init makes all buffers available to userspace). - * - * netmap_reset() and the device-specific routines - * (e.g. ixgbe_setup_receive_rings()) map these - * buffers at the end of the NIC ring, so here we - * must set the RDT (tail) register to make sure - * they are not overwritten. - * - * In this driver the NIC ring starts at RDH = 0, - * RDT points to the last slot available for reception (?), - * so RDT = num_rx_desc - 1 means the whole ring is available. - */ - struct netmap_adapter *na = NA(adapter->netdev); - struct netmap_slot *slot; - int lim, i; - struct ixgbe_ring *ring = adapter->rx_ring[ring_nr]; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_RX, ring_nr, 0); - /* same as in ixgbe_setup_transmit_ring() */ - if (!slot) - return 0; // not in netmap; XXX cannot happen - - lim = na->num_rx_desc - 1 - nm_kr_rxspace(&na->rx_rings[ring_nr]); - - for (i = 0; i < na->num_rx_desc; i++) { - /* - * Fill the map and set the buffer address in the NIC ring, - * considering the offset between the netmap and NIC rings - * (see comment in ixgbe_setup_transmit_ring() ). - */ - int si = netmap_idx_n2k(&na->rx_rings[ring_nr], i); - uint64_t paddr; - PNMB(slot + si, &paddr); - // netmap_load_map(rxr->ptag, rxbuf->pmap, addr); - /* Update descriptor */ - IXGBE_RX_DESC_ADV(ring, i)->read.pkt_addr = htole64(paddr); - } - IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(ring_nr), lim); - return 1; -} - - -/* - * The attach routine, called near the end of ixgbe_attach(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - */ -static void -ixgbe_netmap_attach(struct SOFTC_T *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->netdev; - na.num_tx_desc = adapter->tx_ring[0]->count; - na.num_rx_desc = adapter->rx_ring[0]->count; - na.nm_txsync = ixgbe_netmap_txsync; - na.nm_rxsync = ixgbe_netmap_rxsync; - na.nm_register = ixgbe_netmap_reg; - na.num_tx_rings = adapter->num_tx_queues; - na.num_rx_rings = adapter->num_rx_queues; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/LINUX/mlx4_netmap_linux.h b/netmap/LINUX/mlx4_netmap_linux.h deleted file mode 100644 index 12b2d9f..0000000 --- a/netmap/LINUX/mlx4_netmap_linux.h +++ /dev/null @@ -1,736 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: mlx4_netmap_linux.h $ - * - * netmap support for mlx4 (LINUX version) - * - */ - - -#include -#include -#include -#define SOFTC_T mlx4_en_priv - -/* - * This driver is split in multiple small files. - * The main device descriptor has type struct mlx4_en_priv *priv; - * and we attach to the device in mlx4_en_init_netdev() - * (do port numbers start from 1 ?) - * - * The reconfig routine is in mlx4_en_start_port() (also here) - * which is called on a mlx4_en_restart() (watchdog), open and set-mtu. - * - * priv->num_frags ?? - * DS_SIZE ?? - * apparently each rx desc is followed by frag.descriptors - * and the rx desc is rounded up to a power of 2. - * - * Receive code is in en_rx.c - * priv->rx_ring_num number of rx rings - * rxr = prov->rx_ring[ring_ind] rx ring descriptor - * rxr->size number of slots - * rxr->prod producer - * probably written into a mmio reg at *rxr->wqres.db.db - * trimmed to 16 bits. - * - * Rx init routine: - * mlx4_en_activate_rx_rings() - * mlx4_en_init_rx_desc() - * Transmit code is in en_tx.c - */ - -int mlx4_netmap_rx_config(struct SOFTC_T *priv, int ring_nr); -int mlx4_netmap_tx_config(struct SOFTC_T *priv, int ring_nr); - -int mlx4_tx_desc_dump(struct mlx4_en_tx_desc *tx_desc); - -#ifdef NETMAP_MLX4_MAIN -static inline void -nm_pkt_dump(int i, char *buf, int len) -{ - uint8_t *s __attribute__((unused)) = buf+6, *d __attribute__((unused)) = buf; - - RD(10, "%d len %4d %02x:%02x:%02x:%02x:%02x:%02x -> %02x:%02x:%02x:%02x:%02x:%02x", - i, - len, - s[0], s[1], s[2], s[3], s[4], s[5], - d[0], d[1], d[2], d[3], d[4], d[5]); -} - -/* show the content of the descriptor. Only the first block is printed - * to make sure we do not fail on wraparounds (otherwise we would need - * base, index and ring size). - */ -int -mlx4_tx_desc_dump(struct mlx4_en_tx_desc *tx_desc) -{ - struct mlx4_wqe_ctrl_seg *ctrl = &tx_desc->ctrl; - uint32_t *p = (uint32_t *)tx_desc; - int i, l = ctrl->fence_size; - - RD(5,"------- txdesc %p size 0x%x", tx_desc, ctrl->fence_size); - if (l > 4) - l = 4; - for (i = 0; i < l; i++) { - RD(20, "[%2d]: 0x%08x 0x%08x 0x%08x 0x%08x", i, - ntohl(p[0]), ntohl(p[1]), ntohl(p[2]), ntohl(p[3])); - p += 4; - } - return 0; -} - - -/* - * Register/unregister. We are already under (netmap) core lock. - * Only called on the first register or the last unregister. - */ -static int -mlx4_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *priv = netdev_priv(ifp); - int error = 0, need_load = 0; - struct mlx4_en_dev *mdev = priv->mdev; - - /* - * On enable, flush pending ops, set flag and reinit rings. - * On disable, flush again, and restart the interface. - */ - D("setting netmap mode for %s to %s", ifp->if_xname, onoff ? "ON" : "OFF"); - // rtnl_lock(); // ??? - if (netif_running(ifp)) { - D("unloading %s", ifp->if_xname); - //double_mutex_state_lock(mdev); - mutex_lock(&mdev->state_lock); - if (onoff == 0) { - int i; - /* coming from netmap mode, clean up the ring pointers - * so we do not crash in mlx4_en_free_tx_buf() - * XXX should STAMP the txdesc value to pretend the hw got there - * 0x7fffffff plus the bit set to - * !!(ring->cons & ring->size) - */ - for (i = 0; i < na->num_tx_rings; i++) { - struct mlx4_en_tx_ring *txr = &priv->tx_ring[i]; - ND("txr %d : cons %d prod %d txbb %d", i, txr->cons, txr->prod, txr->last_nr_txbb); - txr->cons += txr->last_nr_txbb; // XXX should be 1 - for (;txr->cons != txr->prod; txr->cons++) { - uint16_t j = txr->cons & txr->size_mask; - uint32_t new_val, *ptr = (uint32_t *)(txr->buf + j * TXBB_SIZE); - new_val = cpu_to_be32(STAMP_VAL | (!!(txr->cons & txr->size) << STAMP_SHIFT)); - ND(10, "old 0x%08x new 0x%08x", *ptr, new_val); - *ptr = new_val; - } - } - } - mlx4_en_stop_port(ifp); - need_load = 1; - } - -retry: - if (onoff) { /* enable netmap mode */ - nm_set_native_flags(na); - } else { /* reset normal mode */ - nm_clear_native_flags(na); - } - if (need_load) { - D("loading %s", ifp->if_xname); - error = mlx4_en_start_port(ifp); - D("start_port returns %d", error); - if (error && onoff) { - onoff = 0; - goto retry; - } - mutex_unlock(&mdev->state_lock); - //double_mutex_state_unlock(mdev); - } - // rtnl_unlock(); - return (error); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - * This routine might be called frequently so it must be efficient. - * - -OUTGOING (txr->prod) -Tx packets need to fill a 64-byte block with one control block and -one descriptor (both 16-byte). Probably we need to fill the other -two data entries in the block with NULL entries as done in rx_config(). -One can request completion reports (intr) on all entries or only -on selected ones. The std. driver reports every 16 packets. - -txr->prod points to the first available slot to send. - -COMPLETION (txr->cons) -TX events are reported through a Completion Queue (CQ) whose entries -can be 32 or 64 bytes. In case of 64 bytes, the interesting part is -at odd indexes. The "factor" variable does the addressing. - -txr->cons points to the last completed block (XXX note so it is 1 behind) - -There is no link back from the txring to the completion -queue so we need to track it ourselves. HOWEVER mlx4_en_alloc_resources() -uses the same index for cq and ring so tx_cq and tx_ring correspond, -same for rx_cq and rx_ring. - - */ -static int -mlx4_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) -{ - struct ifnet *ifp = na->ifp; - struct netmap_kring *kring = &na->tx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* - * interrupts on every tx packet are expensive so request - * them every half ring, or where NS_REPORT is set - */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - struct SOFTC_T *priv = netdev_priv(ifp); - int error = 0; - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - // XXX debugging, only print if sending something - n = (txr->prod - txr->cons - 1) & 0xffffff; // should be modulo 2^24 ? - if (n >= txr->size) { - RD(5, "XXXXXXXXXXX txr %d overflow: cons %u prod %u size %d delta %d", - ring_nr, txr->cons, txr->prod, txr->size, n); - } - - /* - * First part: process new packets to send. - */ - nm_i = kring->nr_hwcur; - // XXX debugging, assuming lim is 2^x-1 - n = 0; // XXX debugging - if (nm_i != head) { /* we have new packets to send */ - ND(5,"START: txr %u cons %u prod %u hwcur %u head %u tail %d send %d", - ring_nr, txr->cons, txr->prod, kring->nr_hwcur, ring->head, kring->nr_hwtail, - (head - nm_i) & lim); - - // XXX see en_tx.c :: mlx4_en_xmit() - /* - * In netmap the descriptor has one control segment - * and one data segment. The control segment is 16 bytes, - * the data segment is another 16 bytes mlx4_wqe_data_seg. - * The alignment is TXBB_SIZE (64 bytes) though, so we are - * forced to use 64 bytes each. - */ - - ND(10,"=======>========== send from %d to %d at bd %d", j, k, txr->prod); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - uint32_t l = txr->prod & txr->size_mask; - struct mlx4_en_tx_desc *tx_desc = txr->buf + l * TXBB_SIZE; - struct mlx4_wqe_ctrl_seg *ctrl = &tx_desc->ctrl; - - NM_CHECK_ADDR_LEN(addr, len); - - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, unload and reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - /* - * Fill the slot in the NIC ring. - */ - ctrl->vlan_tag = 0; // not used - ctrl->ins_vlan = 0; // NO - ctrl->fence_size = 2; // used descriptor size in 16byte blocks - // request notification. XXX later report only if NS_REPORT or not too often. - ctrl->srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | - MLX4_WQE_CTRL_SOLICITED); - - // XXX do we need to copy the mac dst address ? - if (1) { // XXX do we need this ? - uint64_t mac = mlx4_en_mac_to_u64(addr); - uint32_t mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); - uint32_t mac_l = (u32) (mac & 0xffffffff); - - ctrl->srcrb_flags |= cpu_to_be32(mac_h); - ctrl->imm = cpu_to_be32(mac_l); - } - - tx_desc->data.addr = cpu_to_be64(paddr); - tx_desc->data.lkey = cpu_to_be32(priv->mdev->mr.key); - wmb(); // XXX why here ? - tx_desc->data.byte_count = cpu_to_be32(len); // XXX crc corrupt ? - wmb(); - ctrl->owner_opcode = cpu_to_be32( - MLX4_OPCODE_SEND | - ((txr->prod & txr->size) ? MLX4_EN_BIT_DESC_OWN : 0) ); - txr->prod++; - nm_i = nm_next(nm_i, lim); - } - kring->nr_hwcur = head; - - /* XXX Check how to deal with nkr_hwofs */ - /* these two are always in sync. */ - wmb(); /* synchronize writes to the NIC ring */ - /* (re)start the transmitter up to slot l (excluded) */ - ND(5, "doorbell cid %d data 0x%x", txdata->cid, txdata->tx_db.raw); - // XXX is this doorbell correct ? - iowrite32be(txr->doorbell_qpn, txr->bf.uar->map + MLX4_SEND_DOORBELL); - } - // XXX debugging, only print if sent something - if (n) - ND(5, "SENT: txr %d cons %u prod %u hwcur %u cur %u tail %d sent %d", - ring_nr, txr->cons, txr->prod, kring->nr_hwcur, ring->cur, kring->nr_hwtail, n); - - /* - * Second part: reclaim buffers for completed transmissions. - */ - - { - struct mlx4_en_cq *cq = &priv->tx_cq[ring_nr]; - struct mlx4_cq *mcq = &cq->mcq; - - int size = cq->size; // number of entries - struct mlx4_cqe *buf = cq->buf; // base of cq entries - uint32_t size_mask = txr->size_mask; // same in txq and cq ?....... - uint16_t new_index, ring_index; - int factor = priv->cqe_factor; // 1 for 64 bytes, 0 for 32 bytes - - /* - * Reclaim buffers for completed transmissions. The CQE tells us - * where the consumer (NIC) is. Bit 7 of the owner_sr_opcode - * is the ownership bit. It toggles up and down so the - * non-bitwise XNOR trick lets us detect toggles as the ring - * wraps around. On even rounds, the second operand is 0 so - * we exit when the MLX4_CQE_OWNER_MASK bit is 1, viceversa - * on odd rounds. - */ - new_index = ring_index = txr->cons & size_mask; - - for (n = 0; n < 2*lim; n++) { - uint16_t index = mcq->cons_index & size_mask; - struct mlx4_cqe *cqe = &buf[(index << factor) + factor]; - - if (!XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, - mcq->cons_index & size)) - break; - /* - * make sure we read the CQE after we read the - * ownership bit - */ - rmb(); - - /* Skip over last polled CQE */ - new_index = be16_to_cpu(cqe->wqe_index) & size_mask; - ND(5, "txq %d new_index %d", ring_nr, new_index); - mcq->cons_index++; - } - if (n > lim) { - D("XXXXXXXXXXX too many notifications %d", n); - } - /* now we have updated cons-index, notify the card. */ - /* XXX can we make it conditional ? */ - wmb(); - mlx4_cq_set_ci(mcq); - // XXX the following enables interrupts... */ - // mlx4_en_arm_cq(priv, cq); // XXX always ? - wmb(); - /* XXX unsigned arithmetic below */ - n = (new_index - ring_index) & size_mask; - if (n) { - ND(5, "txr %d completed %d packets", ring_nr, n); - txr->cons += n; - /* XXX watch out, index is probably modulo */ - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, (new_index & size_mask)), lim); - } - if (nm_kr_txempty(kring)) { - mlx4_en_arm_cq(priv, cq); - } - } - -out: - nm_txsync_finalize(kring); - return 0; - -err: - if (error) - return netmap_ring_reinit(kring); - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - -MELLANOX: - -the ring has prod and cons indexes, the size is a power of 2, -size and actual_size indicate how many entries can be allocated, -stride is the size of each entry. - -mlx4_en_update_rx_prod_db() tells the NIC where it can go -(to be used when new buffers are freed). - - */ -static int -mlx4_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) -{ - struct ifnet *ifp = na->ifp; - struct netmap_kring *kring = &na->rx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - struct SOFTC_T *priv = netdev_priv(ifp); - struct mlx4_en_rx_ring *rxr = &priv->rx_ring[ring_nr]; - - if (!priv->port_up) // XXX as in mlx4_en_process_rx_cq() - return 0; - - if (!netif_carrier_ok(ifp)) // XXX maybe above is redundant ? - return 0; - - if (head > lim) - return netmap_ring_reinit(kring); - - ND(5, "START rxr %d cons %d prod %d kcur %d ktail %d cur %d tail %d", - ring_nr, rxr->cons, rxr->prod, kring->nr_hwcur, kring->nr_hwtail, ring->cur, ring->tail); - - /* - * First part, import newly received packets. - */ - - /* scan the completion queue to see what is going on. - * The mapping is 1:1. The hardware toggles the OWNER bit in the - * descriptor at mcq->cons_index & size_mask, which is mapped 1:1 - * to an entry in the RXR. - * XXX there are two notifications sent to the hw: - * mlx4_cq_set_ci(struct mlx4_cq *cq); - * *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); - * mlx4_en_update_rx_prod_db(rxr); - * *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); - * apparently they point to the same memory word - * (see mlx4_en_activate_cq() ) and are initialized to 0 - * DB is the doorbell page (sec.15.1.2 ?) - * wqres is set in mlx4_alloc_hwq_res() - * and in turn mlx4_alloc_hwq_res() - */ - if (1 || netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - struct mlx4_en_cq *cq = &priv->rx_cq[ring_nr]; - struct mlx4_cq *mcq = &cq->mcq; - int factor = priv->cqe_factor; - uint32_t size_mask = rxr->size_mask; - int size = cq->size; - struct mlx4_cqe *buf = cq->buf; - - nm_i = kring->nr_hwtail; - - /* Process all completed CQEs, use same logic as in TX */ - for (n = 0; n <= 2*lim ; n++) { - int index = mcq->cons_index & size_mask; - struct mlx4_cqe *cqe = &buf[(index << factor) + factor]; - prefetch(cqe+1); - if (!XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, mcq->cons_index & size)) - break; - - rmb(); /* make sure data is up to date */ - ring->slot[nm_i].len = be32_to_cpu(cqe->byte_cnt) - rxr->fcs_del; - ring->slot[nm_i].flags = slot_flags; - mcq->cons_index++; - nm_i = nm_next(nm_i, lim); - } - if (n) { /* update the state variables */ - if (n >= 2*lim) - D("XXXXXXXXXXXXX too many received packets %d", n); - ND(5, "received %d packets", n); - kring->nr_hwtail = nm_i; - rxr->cons += n; - ND(5, "RECVD %d rxr %d cons %d prod %d kcur %d ktail %d cur %d tail %d", - n, - ring_nr, rxr->cons, rxr->prod, kring->nr_hwcur, kring->nr_hwtail, ring->cur, ring->tail); - - /* XXX ack completion queue */ - mlx4_cq_set_ci(mcq); - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; /* netmap ring index */ - if (nm_i != head) { /* userspace has released some packets. */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - /* collect per-slot info, with similar validations - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - struct mlx4_en_rx_desc *rx_desc = rxr->buf + (nic_i * rxr->stride); - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - - /* XXX - * The rx descriptor only contains buffer descriptors, - * probably only the length is changed or not even that one. - */ - // see mlx4_en_prepare_rx_desc() and mlx4_en_alloc_frag() - rx_desc->data[0].addr = cpu_to_be64(paddr); - rx_desc->data[0].byte_count = cpu_to_be32(NETMAP_BUF_SIZE); - rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); - -#if 0 - int jj, possible_frags; - /* we only use one fragment, so the rest is padding */ - possible_frags = (rxr->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; - for (jj = 1; jj < possible_frags; jj++) { - rx_desc->data[jj].byte_count = 0; - rx_desc->data[jj].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); - rx_desc->data[jj].addr = 0; - } -#endif - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - - /* XXX note that mcq->cons_index and ring->cons are not in sync */ - wmb(); - rxr->prod += n; - kring->nr_hwcur = head; - - /* and now tell the system that there are more buffers available. - * should use mlx4_en_update_rx_prod_db(rxr) but it is static in - * en_rx.c so we do not see it here - */ - *rxr->wqres.db.db = cpu_to_be32(rxr->prod & 0xffff); - - ND(5, "FREED rxr %d cons %d prod %d kcur %d ktail %d", - ring_nr, rxr->cons, rxr->prod, - kring->nr_hwcur, kring->nr_hwtail); - } - - /* tell userspace that there are new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * If in netmap mode, attach the netmap buffers to the ring and return true. - * Otherwise return false. - * Called at the end of mlx4_en_start_port(). - * XXX TODO: still incomplete. - */ -int -mlx4_netmap_tx_config(struct SOFTC_T *priv, int ring_nr) -{ - struct netmap_adapter *na = NA(priv->dev); - struct netmap_slot *slot; - struct mlx4_en_cq *cq; - - ND(5, "priv %p ring_nr %d", priv, ring_nr); - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - -/* - CONFIGURE TX RINGS IN NETMAP MODE - little if anything to do - The main code does - mlx4_en_activate_cq() - mlx4_en_activate_tx_ring() - - - */ - slot = netmap_reset(na, NR_TX, ring_nr, 0); - if (!slot) - return 0; // not in netmap mode; - ND(5, "init tx ring %d with %d slots (driver %d)", ring_nr, - na->num_tx_desc, - priv->tx_ring[ring_nr].size); - /* enable interrupts on the netmap queues */ - cq = &priv->tx_cq[ring_nr]; // derive from the txring - - return 1; -} - -int -mlx4_netmap_rx_config(struct SOFTC_T *priv, int ring_nr) -{ - struct netmap_adapter *na = NA(priv->dev); - struct netmap_slot *slot; - struct mlx4_en_rx_ring *rxr; - struct netmap_kring *kring; - int i, j, possible_frags; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - /* - * on the receive ring, must set buf addresses into the slots. - - The ring is activated by mlx4_en_activate_rx_rings(), near the end - the rx ring is also 'started' with mlx4_en_update_rx_prod_db() - so we patch into that routine. - - */ - slot = netmap_reset(na, NR_RX, ring_nr, 0); - if (!slot) // XXX should not happen - return 0; - kring = &na->rx_rings[ring_nr]; - rxr = &priv->rx_ring[ring_nr]; - ND(20, "ring %d slots %d (driver says %d) frags %d stride %d", ring_nr, - kring->nkr_num_slots, rxr->actual_size, priv->num_frags, rxr->stride); - rxr->prod--; // XXX avoid wraparounds ? - if (kring->nkr_num_slots != rxr->actual_size) { - D("mismatch between slots and actual size, %d vs %d", - kring->nkr_num_slots, rxr->actual_size); - return 1; // XXX error - } - possible_frags = (rxr->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; - RD(1, "stride %d possible frags %d descsize %d DS_SIZE %d", rxr->stride, possible_frags, (int)sizeof(struct mlx4_en_rx_desc), (int)DS_SIZE ); - /* then fill the slots with our entries */ - for (i = 0; i < kring->nkr_num_slots; i++) { - uint64_t paddr; - struct mlx4_en_rx_desc *rx_desc = rxr->buf + (i * rxr->stride); - - PNMB(slot + i, &paddr); - - // see mlx4_en_prepare_rx_desc() and mlx4_en_alloc_frag() - rx_desc->data[0].addr = cpu_to_be64(paddr); - rx_desc->data[0].byte_count = cpu_to_be32(NETMAP_BUF_SIZE); - rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); - - /* we only use one fragment, so the rest is padding */ - for (j = 1; j < possible_frags; j++) { - rx_desc->data[j].byte_count = 0; - rx_desc->data[j].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); - rx_desc->data[j].addr = 0; - } - } - RD(5, "ring %d done", ring_nr); - return 1; -} - -static int -mlx4_netmap_config(struct netmap_adapter *na, - u_int *txr, u_int *txd, u_int *rxr, u_int *rxd) -{ - struct net_device *ifp = na->ifp; - struct SOFTC_T *priv = netdev_priv(ifp); - - *txr = priv->tx_ring_num; - *txd = priv->tx_ring[0].size; - - - *rxr = priv->rx_ring_num; - if (*txr > *rxr) { - D("using only %d out of %d tx queues", *rxr, *txr); - *txr = *rxr; - } - *rxd = priv->rx_ring[0].size; - D("txr %d txd %d bufsize %d -- rxr %d rxd %d act %d bufsize %d", - *txr, *txd, priv->tx_ring[0].buf_size, - *rxr, *rxd, priv->rx_ring[0].actual_size, - priv->rx_ring[0].buf_size); - return 0; -} - - -/* - * The attach routine, called near the end of mlx4_en_init_netdev(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - * - * XXX TODO: - * at the moment use a single lock, and only init a max of 4 queues. - */ -static void -mlx4_netmap_attach(struct SOFTC_T *priv) -{ - struct netmap_adapter na; - struct net_device *dev = priv->dev; - int rxq, txq; - - bzero(&na, sizeof(na)); - - na.ifp = dev; - rxq = priv->rx_ring_num; - txq = priv->tx_ring_num; - /* this card has 1k tx queues, so better limit the number */ - if (rxq > 16) - rxq = 16; - if (txq > rxq) - txq = rxq; - if (txq < 1 && rxq < 1) - txq = rxq = 1; - na.num_tx_rings = txq; - na.num_rx_rings = rxq; - na.num_tx_desc = priv->tx_ring[0].size; - na.num_rx_desc = priv->rx_ring[0].size; - na.nm_txsync = mlx4_netmap_txsync; - na.nm_rxsync = mlx4_netmap_rxsync; - na.nm_register = mlx4_netmap_reg; - na.nm_config = mlx4_netmap_config; - netmap_attach(&na); -} -#endif /* NETMAP_MLX4_MAIN */ -/* end of file */ diff --git a/netmap/LINUX/netmap_linux.c b/netmap/LINUX/netmap_linux.c deleted file mode 100644 index 46ddd63..0000000 --- a/netmap/LINUX/netmap_linux.c +++ /dev/null @@ -1,1257 +0,0 @@ -/* - * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "bsd_glue.h" -#include /* fget(int fd) */ - -#include -#include -#include - - -/* #################### VALE OFFLOADINGS SUPPORT ################## */ - -/* Compute and return a raw checksum over (data, len), using 'cur_sum' - * as initial value. Both 'cur_sum' and the return value are in host - * byte order. - */ -rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) -{ - return csum_partial(data, len, cur_sum); -} - -/* Compute an IPv4 header checksum, where 'data' points to the IPv4 header, - * and 'len' is the IPv4 header length. Return value is in network byte - * order. - */ -uint16_t nm_csum_ipv4(struct nm_iphdr *iph) -{ - return ip_compute_csum((void*)iph, sizeof(struct nm_iphdr)); -} - -/* Compute and insert a TCP/UDP checksum over IPv4: 'iph' points to the IPv4 - * header, 'data' points to the TCP/UDP header, 'datalen' is the lenght of - * TCP/UDP header + payload. - */ -void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, - size_t datalen, uint16_t *check) -{ - *check = csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, iph->protocol, - csum_partial(data, datalen, 0)); -} - -/* Compute and insert a TCP/UDP checksum over IPv6: 'ip6h' points to the IPv6 - * header, 'data' points to the TCP/UDP header, 'datalen' is the lenght of - * TCP/UDP header + payload. - */ -void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, - size_t datalen, uint16_t *check) -{ - *check = csum_ipv6_magic((void *)&ip6h->saddr, (void*)&ip6h->daddr, - datalen, ip6h->nexthdr, - csum_partial(data, datalen, 0)); -} - -uint16_t nm_csum_fold(rawsum_t cur_sum) -{ - return csum_fold(cur_sum); -} - - -/* ####################### MITIGATION SUPPORT ###################### */ - -/* - * The generic driver calls netmap once per received packet. - * This is inefficient so we implement a mitigation mechanism, - * as follows: - * - the first packet on an idle receiver triggers a notification - * and starts a timer; - * - subsequent incoming packets do not cause a notification - * until the timer expires; - * - when the timer expires and there are pending packets, - * a notification is sent up and the timer is restarted. - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) -int -#else -enum hrtimer_restart -#endif -generic_timer_handler(struct hrtimer *t) -{ - struct nm_generic_mit *mit = - container_of(t, struct nm_generic_mit, mit_timer); - u_int work_done; - - if (!mit->mit_pending) { - return HRTIMER_NORESTART; - } - - /* Some work arrived while the timer was counting down: - * Reset the pending work flag, restart the timer and send - * a notification. - */ - mit->mit_pending = 0; - /* below is a variation of netmap_generic_irq */ - if (mit->mit_na->ifp->if_capenable & IFCAP_NETMAP) { - netmap_common_irq(mit->mit_na->ifp, mit->mit_ring_idx, &work_done); - generic_rate(0, 0, 0, 0, 0, 1); - } - netmap_mitigation_restart(mit); - - return HRTIMER_RESTART; -} - - -void netmap_mitigation_init(struct nm_generic_mit *mit, int idx, - struct netmap_adapter *na) -{ - hrtimer_init(&mit->mit_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - mit->mit_timer.function = &generic_timer_handler; - mit->mit_pending = 0; - mit->mit_ring_idx = idx; - mit->mit_na = na; -} - - -void netmap_mitigation_start(struct nm_generic_mit *mit) -{ - hrtimer_start(&mit->mit_timer, ktime_set(0, netmap_generic_mit), HRTIMER_MODE_REL); -} - -void netmap_mitigation_restart(struct nm_generic_mit *mit) -{ - hrtimer_forward_now(&mit->mit_timer, ktime_set(0, netmap_generic_mit)); -} - -int netmap_mitigation_active(struct nm_generic_mit *mit) -{ - return hrtimer_active(&mit->mit_timer); -} - -void netmap_mitigation_cleanup(struct nm_generic_mit *mit) -{ - hrtimer_cancel(&mit->mit_timer); -} - - - -/* #################### GENERIC ADAPTER SUPPORT ################### */ - -/* - * This handler is registered within the attached net_device - * in the Linux RX subsystem, so that every mbuf passed up by - * the driver can be stolen to the network stack. - * Stolen packets are put in a queue where the - * generic_netmap_rxsync() callback can extract them. - * Packets that comes from netmap_txsync_to_host() are not - * stolen. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38) // not defined before -rx_handler_result_t linux_generic_rx_handler(struct mbuf **pm) -{ - /* If we were called by NM_SEND_UP(), we want to pass the mbuf - to network stack. We detect this situation looking at the - priority field. */ - if ((*pm)->priority == NM_MAGIC_PRIORITY_RX) - return RX_HANDLER_PASS; - - /* When we intercept a sk_buff coming from the driver, it happens that - skb->data points to the IP header, e.g. the ethernet header has - already been pulled. Since we want the netmap rings to contain the - full ethernet header, we push it back, so that the RX ring reader - can see it. */ - skb_push(*pm, 14); - - /* Steal the mbuf and notify the pollers for a new RX packet. */ - generic_rx_handler((*pm)->dev, *pm); - - return RX_HANDLER_CONSUMED; -} -#else /* 2.6.36 .. 2.6.38 */ -struct sk_buff *linux_generic_rx_handler(struct mbuf *m) -{ - generic_rx_handler(m->dev, m); - return NULL; -} -#endif /* 2.6.36..2.6.38 */ - -/* Ask the Linux RX subsystem to intercept (or stop intercepting) - * the packets incoming from the interface attached to 'na'. - */ -int -netmap_catch_rx(struct netmap_adapter *na, int intercept) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) // not defined before - return 0; -#else - struct ifnet *ifp = na->ifp; - - if (intercept) { - return -netdev_rx_handler_register(na->ifp, - &linux_generic_rx_handler, na); - } else { - netdev_rx_handler_unregister(ifp); - return 0; - } -#endif -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) -u16 generic_ndo_select_queue(struct ifnet *ifp, struct mbuf *m) -#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) -u16 generic_ndo_select_queue(struct ifnet *ifp, struct mbuf *m, - void *accel_priv) -#else -u16 generic_ndo_select_queue(struct ifnet *ifp, struct mbuf *m, - void *accel_priv, - select_queue_fallback_t fallback) -#endif -{ - return skb_get_queue_mapping(m); // actually 0 on 2.6.23 and before -} - -/* Replacement for the driver ndo_start_xmit() method. - * When this function is invoked because of the dev_queue_xmit() call - * in generic_xmit_frame() (e.g. because of a txsync on the NIC), we have - * to call the original ndo_start_xmit() method. - * In all the other cases (e.g. when the TX request comes from the network - * stack) we intercept the packet and put it into the RX ring associated - * to the host stack. - */ -static netdev_tx_t -generic_ndo_start_xmit(struct mbuf *m, struct ifnet *ifp) -{ - struct netmap_generic_adapter *gna = - (struct netmap_generic_adapter *)NA(ifp); - - if (likely(m->priority == NM_MAGIC_PRIORITY_TX)) - return gna->save_start_xmit(m, ifp); /* To the driver. */ - - /* To a netmap RX ring. */ - return linux_netmap_start_xmit(m, ifp); -} - -/* Must be called under rtnl. */ -void netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) -{ - struct netmap_adapter *na = &gna->up.up; - struct ifnet *ifp = na->ifp; - - if (enable) { - /* - * Save the old pointer to the netdev_ops, - * create an updated netdev ops replacing the - * ndo_select_queue() and ndo_start_xmit() methods - * with our custom ones, and make the driver use it. - */ - na->if_transmit = (void *)ifp->netdev_ops; - /* Save a redundant copy of ndo_start_xmit(). */ - gna->save_start_xmit = ifp->netdev_ops->ndo_start_xmit; - - gna->generic_ndo = *ifp->netdev_ops; /* Copy all */ - gna->generic_ndo.ndo_start_xmit = &generic_ndo_start_xmit; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - printk("%s: no packet steering support\n", __FUNCTION__); -#else - gna->generic_ndo.ndo_select_queue = &generic_ndo_select_queue; -#endif - - ifp->netdev_ops = &gna->generic_ndo; - } else { - /* Restore the original netdev_ops. */ - ifp->netdev_ops = (void *)na->if_transmit; - } -} - -/* Transmit routine used by generic_netmap_txsync(). Returns 0 on success - and -1 on error (which may be packet drops or other errors). */ -int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, - void *addr, u_int len, u_int ring_nr) -{ - netdev_tx_t ret; - - /* Empty the sk_buff. */ - if (unlikely(skb_headroom(m))) - skb_push(m, skb_headroom(m)); - skb_trim(m, 0); - - /* TODO Support the slot flags (NS_MOREFRAG, NS_INDIRECT). */ - skb_copy_to_linear_data(m, addr, len); // skb_store_bits(m, 0, addr, len); - skb_put(m, len); - NM_ATOMIC_INC(&m->users); - m->dev = ifp; - /* Tell generic_ndo_start_xmit() to pass this mbuf to the driver. */ - m->priority = NM_MAGIC_PRIORITY_TX; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24) // XXX - skb_set_queue_mapping(m, ring_nr); -#endif - - ret = dev_queue_xmit(m); - - if (likely(ret == NET_XMIT_SUCCESS)) { - return 0; - } - if (unlikely(ret != NET_XMIT_DROP)) { - /* If something goes wrong in the TX path, there is nothing - intelligent we can do (for now) apart from error reporting. */ - RD(5, "dev_queue_xmit failed: HARD ERROR %d", ret); - } - return -1; -} - -/* Use ethtool to find the current NIC rings lengths, so that the netmap - rings can have the same lengths. */ -int -generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) // XXX - struct ethtool_ringparam rp; - - if (ifp->ethtool_ops && ifp->ethtool_ops->get_ringparam) { - ifp->ethtool_ops->get_ringparam(ifp, &rp); - *tx = rp.tx_pending; - *rx = rp.rx_pending; - } -#endif /* 2.6.31 and above */ - return 0; -} - -/* Fills in the output arguments with the number of hardware TX/RX queues. */ -void -generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) // XXX - *txq = 1; - *rxq = 1; /* TODO ifp->real_num_rx_queues */ -#else - *txq = ifp->real_num_tx_queues; - *rxq = ifp->real_num_rx_queues; -#endif -} - - -/* ######################## FILE OPERATIONS ####################### */ - -struct net_device * -ifunit_ref(const char *name) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) // XXX - return dev_get_by_name(name); -#else - return dev_get_by_name(&init_net, name); -#endif -} - -void if_rele(struct net_device *ifp) -{ - dev_put(ifp); -} - - - -/* - * Remap linux arguments into the FreeBSD call. - * - pwait is the poll table, passed as 'dev'; - * If pwait == NULL someone else already woke up before. We can report - * events but they are filtered upstream. - * If pwait != NULL, then pwait->key contains the list of events. - * - events is computed from pwait as above. - * - file is passed as 'td'; - */ -static u_int -linux_netmap_poll(struct file * file, struct poll_table_struct *pwait) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) // was 28 XXX - int events = POLLIN | POLLOUT; /* XXX maybe... */ -#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) - int events = pwait ? pwait->key : POLLIN | POLLOUT | POLLERR; -#else /* in 3.4.0 field 'key' was renamed to '_key' */ - int events = pwait ? pwait->_key : POLLIN | POLLOUT | POLLERR; -#endif - return netmap_poll((void *)pwait, events, (void *)file); -} - - -static int -linux_netmap_mmap(struct file *f, struct vm_area_struct *vma) -{ - int error = 0; - unsigned long off, va; - vm_ooffset_t pa; - struct netmap_priv_d *priv = f->private_data; - /* - * vma->vm_start: start of mapping user address space - * vma->vm_end: end of the mapping user address space - * vma->vm_pfoff: offset of first page in the device - */ - - // XXX security checks - - error = netmap_get_memory(priv); - ND("get_memory returned %d", error); - if (error) - return -error; - - if ((vma->vm_start & ~PAGE_MASK) || (vma->vm_end & ~PAGE_MASK)) { - ND("vm_start = %lx vm_end = %lx", vma->vm_start, vma->vm_end); - return -EINVAL; - } - - for (va = vma->vm_start, off = vma->vm_pgoff; - va < vma->vm_end; - va += PAGE_SIZE, off++) - { - pa = netmap_mem_ofstophys(priv->np_mref, off << PAGE_SHIFT); - if (pa == 0) - return -EINVAL; - - ND("va %lx pa %p", va, pa); - error = remap_pfn_range(vma, va, pa >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot); - if (error) - return error; - } - return 0; -} - - -/* - * This one is probably already protected by the netif lock XXX - */ -netdev_tx_t -linux_netmap_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - netmap_transmit(dev, skb); - return (NETDEV_TX_OK); -} - -/* while in netmap mode, we cannot tolerate any change in the - * number of rx/tx rings and descriptors - */ -int -linux_netmap_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *e) -{ - return -EBUSY; -} - -#ifdef ETHTOOL_SCHANNELS -int -linux_netmap_set_channels(struct net_device *dev, - struct ethtool_channels *e) -{ - return -EBUSY; -} -#endif - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) // XXX was 38 -#define LIN_IOCTL_NAME .ioctl -int -linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */) -#else -#define LIN_IOCTL_NAME .unlocked_ioctl -long -linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */) -#endif -{ - int ret; - struct nmreq nmr; - bzero(&nmr, sizeof(nmr)); - - if (cmd == NIOCTXSYNC || cmd == NIOCRXSYNC) { - data = 0; /* no argument required here */ - } - if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0) - return -EFAULT; - ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file); - if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0) - return -EFAULT; - return -ret; -} - - -static int -linux_netmap_release(struct inode *inode, struct file *file) -{ - (void)inode; /* UNUSED */ - if (file->private_data) - netmap_dtor(file->private_data); - return (0); -} - - -static int -linux_netmap_open(struct inode *inode, struct file *file) -{ - struct netmap_priv_d *priv; - (void)inode; /* UNUSED */ - - priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (priv == NULL) - return -ENOMEM; - - file->private_data = priv; - - return (0); -} - - -static struct file_operations netmap_fops = { - .owner = THIS_MODULE, - .open = linux_netmap_open, - .mmap = linux_netmap_mmap, - LIN_IOCTL_NAME = linux_netmap_ioctl, - .poll = linux_netmap_poll, - .release = linux_netmap_release, -}; - - - -/* ##################### V1000 BACKEND SUPPORT ##################### */ - -/* Private info stored into the memory area pointed by - netmap_adapter.na_private field. */ -struct netmap_backend { - /* The netmap adapter connected to the v1000 backend. */ - struct netmap_adapter *na; - /* The file struct attached to the unique priv_structure - attached to *na. */ - struct file *file; - /* Pointer to the task which owns this v1000 backend, and - so the adapter. */ - struct task_struct *owner; - /* Pointers to callbacks (in *na) that are overridden by - the v1000 backend. */ - void (*saved_nm_dtor)(struct netmap_adapter *); - int (*saved_nm_notify)(struct netmap_adapter *, u_int ring, - enum txrx, int flags); -}; - -/* Callback that overrides na->nm_dtor. */ -static void netmap_backend_nm_dtor(struct netmap_adapter *na) -{ - struct netmap_backend *be = na->na_private; - - if (be) { - /* Restore the netmap adapter callbacks - overridden by the backend. */ - na->nm_dtor = be->saved_nm_dtor; - na->nm_notify = be->saved_nm_notify; - /* Free the backend memory. */ - kfree(be); - na->na_private = NULL; - D("v1000 backend support removed for %p", na); - - } - - /* Call the original destructor, if any. */ - if (na->nm_dtor) - na->nm_dtor(na); -} - -/* Callback that overrides na->nm_notify. */ -static int netmap_backend_nm_notify(struct netmap_adapter *na, - u_int n_ring, enum txrx tx, int flags) -{ - struct netmap_kring *kring; - - ND("called"); - if (tx == NR_TX) { - kring = na->tx_rings + n_ring; - wake_up_interruptible_poll(&kring->si, POLLIN | - POLLRDNORM | POLLRDBAND); - if (na->tx_si_users > 0) - wake_up_interruptible_poll(&na->tx_si, POLLIN | - POLLRDNORM | POLLRDBAND); - } else { - kring = na->rx_rings + n_ring; - wake_up_interruptible_poll(&kring->si, POLLIN | - POLLRDNORM | POLLRDBAND); - if (na->rx_si_users > 0) - wake_up_interruptible_poll(&na->rx_si, POLLIN | - POLLRDNORM | POLLRDBAND); - } - - return 0; -} - -/* Called by an external module (the v1000 frontend) which wants to - attach to the netmap file descriptor fd. Setup the backend (if - necessary) and return a pointer to the backend private structure, - which can be passed back to the backend exposed interface. - If successful, the caller holds a reference to the file struct - associated to 'fd'. -*/ -void *netmap_get_backend(int fd) -{ - struct file *filp = fget(fd); /* fd --> file */ - struct netmap_priv_d *priv; - struct netmap_adapter *na; - struct netmap_backend *be; - int error = 0; - - if (!filp) - return ERR_PTR(-EBADF); - - if (filp->f_op != &netmap_fops) { - error = -EINVAL; - goto err; - } - - /* file --> netmap priv */ - priv = (struct netmap_priv_d *)filp->private_data; - if (!priv) { - error = -EBADF; - goto err; - } - - NMG_LOCK(); - na = priv->np_na; /* netmap priv --> netmap adapter */ - if (na == NULL) { - error = -EBADF; - goto lock_err; - } - - be = (struct netmap_backend *)(na->na_private); - - /* Allow request if the netmap adapter is not already used by - the kernel or the request comes from the owner. */ - if (NETMAP_OWNED_BY_KERN(na) && (!be || be->owner != current)) { - error = -EBUSY; - goto lock_err; - } - - if (!be) { - /* Setup the backend. */ - be = na->na_private = malloc(sizeof(struct netmap_backend), - M_DEVBUF, M_MOWAIT | M_ZERO); - if (!be) { - error = -ENOMEM; - goto lock_err; - } - be->na = na; - be->file = filp; - be->owner = current; /* set the owner */ - - /* Override some callbacks. */ - be->saved_nm_dtor = na->nm_dtor; - be->saved_nm_notify = na->nm_notify; - na->nm_dtor = &netmap_backend_nm_dtor; - na->nm_notify = &netmap_backend_nm_notify; - - D("v1000 backend support created for %p", na); - } - NMG_UNLOCK(); - - return be; - -lock_err: - NMG_UNLOCK(); -err: - fput(filp); - return ERR_PTR(error); -} -EXPORT_SYMBOL(netmap_get_backend); - -struct file* netmap_backend_get_file(void *opaque) -{ - struct netmap_backend *be = opaque; - - return be->file; -} -EXPORT_SYMBOL(netmap_backend_get_file); - -static int netmap_common_sendmsg(struct netmap_adapter *na, struct msghdr *m, - size_t len, unsigned flags) -{ - struct netmap_ring *ring; - struct netmap_kring *kring; - unsigned i, last; - unsigned avail; - unsigned j; - unsigned nm_buf_size; - struct iovec *iov = m->msg_iov; - size_t iovcnt = m->msg_iovlen; - - ND("message_len %d, %p", (int)len, na_sock); - - if (unlikely(na == NULL)) { - RD(5, "Null netmap adapter"); - return len; - } - - /* Grab the netmap ring normally used from userspace. */ - kring = &na->tx_rings[0]; - ring = kring->ring; - nm_buf_size = ring->nr_buf_size; - - i = last = ring->cur; - avail = ring->tail + ring->num_slots - ring->cur; - if (avail >= ring->num_slots) - avail -= ring->num_slots; - - ND("A) cur=%d avail=%d, hwcur=%d, hwtail=%d\n", - i, avail, na->tx_rings[0].nr_hwcur, na->tx_rings[0].nr_hwtail); - if (avail < iovcnt) { - /* Not enough netmap slots. */ - return 0; - } - - for (j=0; jslot[i]); - - ring->slot[i].len = nm_frag_size; - ring->slot[i].flags = NS_MOREFRAG; - if (copy_from_user(dst, iov_frag + offset, nm_frag_size)) { - D("copy_from_user() error"); - } - - last = i; - if (unlikely(++i == ring->num_slots)) - i = 0; - avail--; - - offset += nm_frag_size; - iov_frag_size -= nm_frag_size; - } - } - - ring->slot[last].flags &= ~NS_MOREFRAG; - - ring->cur = i; - - if (!(flags & MSG_MORE)) - kring->nm_sync(kring, 0); - ND("B) cur=%d avail=%d, hwcur=%d, hwtail=%d\n", - i, avail, na->tx_rings[0].nr_hwcur, na->tx_rings[0].nr_hwtail); - - return len; -} - -int netmap_backend_sendmsg(void *opaque, struct msghdr *m, size_t len, unsigned flags) -{ - struct netmap_backend *be = opaque; - - return netmap_common_sendmsg(be->na, m, len, flags); -} -EXPORT_SYMBOL(netmap_backend_sendmsg); - -static inline int netmap_common_peek_head_len(struct netmap_adapter *na) -{ - /* Here we assume to have a virtual port. */ - struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - struct netmap_kring *kring = &na->rx_rings[0]; - struct netmap_ring *ring = kring->ring; - u_int i; - int ret = 0; - - /* Do the rxsync here. The recvmsg() callback must be - called after the peek_head_len() callback. */ - if (nm_ring_empty(ring)) - kring->nm_sync(kring, NAF_FORCE_READ); - - i = ring->cur; - if (!nm_ring_empty(ring)) { - for(;;) { - ret += ring->slot[i].len; - if (!(ring->slot[i].flags & NS_MOREFRAG)) - break; - if (unlikely(++i == ring->num_slots)) - i = 0; - } - } - ND("peek %d, %d iovecs cur=%d tail=%d, hwcur=%d, hwtail=%d\n", - ret, i + 1 - ring->cur, - ring->cur, ring->tail, be->na->rx_rings[0].nr_hwcur, - be->na->rx_rings[0].nr_hwtail); - - /* The v1000 frontend assumes that the peek_head_len() callback - doesn't count the bytes of the virtio-net-header. */ - if (likely(ret >= vpna->virt_hdr_len)) { - ret -= vpna->virt_hdr_len; - } - - return ret; -} - -int netmap_backend_peek_head_len(void *opaque) -{ - struct netmap_backend *be = opaque; - - return netmap_common_peek_head_len(be->na); -} -EXPORT_SYMBOL(netmap_backend_peek_head_len); - -static int netmap_common_recvmsg(struct netmap_adapter *na, - struct msghdr *m, size_t len) -{ - struct netmap_ring *ring; - /* netmap variables */ - unsigned i, avail; - bool morefrag; - unsigned nm_frag_size; - unsigned nm_frag_ofs; - uint8_t *src; - /* iovec variables */ - unsigned j; - struct iovec *iov = m->msg_iov; - size_t iovcnt = m->msg_iovlen; - uint8_t *dst; - unsigned iov_frag_size; - unsigned iov_frag_ofs; - /* counters */ - unsigned copy_size; - unsigned copied; - - /* The caller asks for 'len' bytes. */ - ND("recvmsg %d, %p", (int)len, na); - - if (unlikely(na == NULL)) { - RD(5, "Null netmap adapter"); - return len; - } - - /* Total bytes actually copied. */ - copied = 0; - - /* Grab the netmap RX ring normally used from userspace. */ - ring = na->rx_rings[0].ring; - i = ring->cur; - - avail = ring->tail + ring->num_slots - ring->cur; - if (avail >= ring->num_slots) - avail -= ring->num_slots; - - ND("A) cur=%d avail=%d, hwcur=%d, hwtail=%d\n", - i, avail, na->rx_rings[0].nr_hwcur, na->rx_rings[0].nr_hwtail); - - /* Index into the input iovec[]. */ - j = 0; - - /* Spurious call: Do nothing. */ - if (unlikely(avail == 0)) - return 0; - - /* init netmap variables */ - morefrag = (ring->slot[i].flags & NS_MOREFRAG); - nm_frag_ofs = 0; - nm_frag_size = ring->slot[i].len; - src = BDG_NMB(na, &ring->slot[i]); - if (unlikely(++i == ring->num_slots)) - i = 0; - avail--; - - /* init iovec variables */ - iov_frag_ofs = 0; - iov_frag_size = iov[j].iov_len; - dst = iov[j].iov_base; - j++; - - /* Copy from the netmap scatter-gather to the caller - * scatter-gather. - */ - while (copied < len) { - copy_size = min(nm_frag_size, iov_frag_size); - if (unlikely(copy_to_user(dst + iov_frag_ofs, - src + nm_frag_ofs, copy_size))) { - RD(5, "copy_to_user() failed"); - } - nm_frag_ofs += copy_size; - nm_frag_size -= copy_size; - iov_frag_ofs += copy_size; - iov_frag_size -= copy_size; - copied += copy_size; - if (nm_frag_size == 0) { - /* Netmap slot exhausted. If this was the - * last slot, or no more slots ar available, - * we've done. - */ - if (!morefrag || !avail) - break; - morefrag = (ring->slot[i].flags & NS_MOREFRAG); - nm_frag_ofs = 0; - nm_frag_size = ring->slot[i].len; - src = BDG_NMB(na, &ring->slot[i]); - /* Take the next slot. */ - if (unlikely(++i == ring->num_slots)) - i = 0; - avail--; - } - if (iov_frag_size == 0) { - /* The current iovec fragment is exhausted. - * Since we enter here, there must be more - * to read from the netmap slots (otherwise - * we would have exited the loop in the - * above branch). - * If this was the last fragment, it means - * that there is not enough space in the input - * iovec[]. - */ - if (unlikely(j >= iovcnt)) { - break; - } - /* Take the next iovec fragment. */ - iov_frag_ofs = 0; - iov_frag_size = iov[j].iov_len; - dst = iov[j].iov_base; - j++; - } - } - - if (unlikely(!avail && morefrag)) { - RD(5, "Error: ran out of slots, with a pending" - "incomplete packet\n"); - } - - ring->head = ring->cur = i; - - ND("read %d bytes using %d iovecs", copied, j); - ND("B) cur=%d avail=%d, hwcur=%d, hwtail=%d\n", - i, avail, na->rx_rings[0].nr_hwcur, na->rx_rings[0].nr_hwtail); - - return copied; -} - -int netmap_backend_recvmsg(void *opaque, struct msghdr *m, size_t len) -{ - struct netmap_backend *be = opaque; - - return netmap_common_recvmsg(be->na, m, len); -} -EXPORT_SYMBOL(netmap_backend_recvmsg); - - - -/* ######################## SOCKET SUPPORT ######################### */ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) -struct netmap_sock { - struct sock sk; - struct socket sock; - struct socket_wq wq; - void (*saved_nm_dtor)(struct netmap_adapter *); - int (*saved_nm_notify)(struct netmap_adapter *, u_int ring, - enum txrx, int flags); - void *owner; - struct sk_buff *fake_skb; - struct netmap_adapter *na; -}; - -static struct proto netmap_socket_proto = { - .name = "netmap", - .owner = THIS_MODULE, - .obj_size = sizeof(struct netmap_sock), -}; - -static int netmap_socket_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len); -static int netmap_socket_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, int flags); - -static struct proto_ops netmap_socket_ops = { - .sendmsg = netmap_socket_sendmsg, - .recvmsg = netmap_socket_recvmsg, -}; - -static void netmap_sock_write_space(struct sock *sk) -{ - wait_queue_head_t *wqueue; - - if (!sock_writeable(sk) || - !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) { - return; - } - - wqueue = sk_sleep(sk); - if (wqueue && waitqueue_active(wqueue)) { - wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); - } -} - -static void netmap_sock_teardown(struct netmap_adapter *na) -{ - struct netmap_sock *nm_sock = na->na_private; - - if (nm_sock) { - /* Restore the saved destructor. */ - na->nm_dtor = nm_sock->saved_nm_dtor; - na->nm_notify = nm_sock->saved_nm_notify; - - kfree_skb(nm_sock->fake_skb); - - sock_put(&nm_sock->sk); - /* XXX What? - kfree(nm_sock); - sk_release_kernel(&nm_sock->sk); - */ - sk_free(&nm_sock->sk); - na->na_private = NULL; - D("socket support freed for (%p)", na); - } -} - -static void netmap_socket_nm_dtor(struct netmap_adapter *na) -{ - netmap_sock_teardown(na); - /* Call the saved destructor, if any. */ - if (na->nm_dtor) - na->nm_dtor(na); -} - -static int netmap_socket_nm_notify(struct netmap_adapter *na, - u_int n_ring, enum txrx tx, int flags) -{ - struct netmap_kring *kring; - struct netmap_sock *nm_sock; - - D("called"); - nm_sock = (struct netmap_sock *)(na->na_private); - if (likely(nm_sock)) { - struct sk_buff_head* q = &nm_sock->sk.sk_receive_queue; - unsigned long f; - - spin_lock_irqsave(&q->lock, f); - if (!skb_queue_len(q)) { - nm_sock->fake_skb->len = netmap_common_peek_head_len(na); - D("peek %d", nm_sock->fake_skb->len); - if (nm_sock->fake_skb->len) - __skb_queue_tail(q, nm_sock->fake_skb); - } - spin_unlock_irqrestore(&q->lock, f); - } - - if (tx == NR_TX) { - kring = na->tx_rings + n_ring; - wake_up_interruptible_poll(&kring->si, POLLIN | - POLLRDNORM | POLLRDBAND); - if (na->tx_si_users > 0) - wake_up_interruptible_poll(&na->tx_si, POLLIN | - POLLRDNORM | POLLRDBAND); - } else { - kring = na->rx_rings + n_ring; - wake_up_interruptible_poll(&kring->si, POLLIN | - POLLRDNORM | POLLRDBAND); - if (na->rx_si_users > 0) - wake_up_interruptible_poll(&na->rx_si, POLLIN | - POLLRDNORM | POLLRDBAND); - } - - return 0; -} - -static struct netmap_sock *netmap_sock_setup(struct netmap_adapter *na, struct file *filp) -{ - struct netmap_sock *nm_sock; - - na->na_private = nm_sock = (struct netmap_sock *)sk_alloc(&init_net, AF_UNSPEC, - GFP_KERNEL, &netmap_socket_proto); - if (!nm_sock) { - return NULL; - } - - nm_sock->sock.wq = &nm_sock->wq; /* XXX rcu? */ - init_waitqueue_head(&nm_sock->wq.wait); - nm_sock->sock.file = filp; - nm_sock->sock.ops = &netmap_socket_ops; - sock_init_data(&nm_sock->sock, &nm_sock->sk); - nm_sock->sk.sk_write_space = &netmap_sock_write_space; - - /* Create a fake skb. */ - nm_sock->fake_skb = alloc_skb(1800, GFP_ATOMIC); - if (!nm_sock->fake_skb) { - D("fake skbuff allocation failed"); - sk_free(&nm_sock->sk); - na->na_private = NULL; - - return NULL; - } - - sock_hold(&nm_sock->sk); - - /* Set the backpointer to the netmap_adapter parent structure. */ - nm_sock->na = na; - - nm_sock->owner = current; - - nm_sock->saved_nm_dtor = na->nm_dtor; - nm_sock->saved_nm_notify = na->nm_notify; - na->nm_dtor = &netmap_socket_nm_dtor; - na->nm_notify = &netmap_socket_nm_notify; - - D("socket support OK for (%p)", na); - - return nm_sock; -} - -struct socket *get_netmap_socket(int fd) -{ - struct file *filp = fget(fd); - struct netmap_priv_d *priv; - struct netmap_adapter *na; - struct netmap_sock *nm_sock; - - if (!filp) - return ERR_PTR(-EBADF); - - if (filp->f_op != &netmap_fops) - return ERR_PTR(-EINVAL); - - priv = (struct netmap_priv_d *)filp->private_data; - if (!priv) - return ERR_PTR(-EBADF); - - NMG_LOCK(); - na = priv->np_na; - if (na == NULL) { - NMG_UNLOCK(); - return ERR_PTR(-EBADF); - } - - nm_sock = (struct netmap_sock *)(na->na_private); - - if (NETMAP_OWNED_BY_KERN(na) && (!nm_sock || nm_sock->owner != current)) { - NMG_UNLOCK(); - return ERR_PTR(-EBUSY); - } - - if (!nm_sock) - nm_sock = netmap_sock_setup(na, filp); - NMG_UNLOCK(); - - ND("na_private %p, nm_sock %p", na->na_private, nm_sock); - - /* netmap_sock_setup() may fail because of OOM */ - if (!nm_sock) - return ERR_PTR(-ENOMEM); - - return &nm_sock->sock; -} -EXPORT_SYMBOL(get_netmap_socket); - -static int netmap_socket_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) -{ - struct netmap_sock *nm_sock = container_of(sock, struct netmap_sock, sock); - - return netmap_common_sendmsg(nm_sock->na, m, total_len, 0); -} - -static int netmap_socket_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, int flags) -{ - struct netmap_sock *nm_sock = container_of(sock, struct netmap_sock, sock); - struct netmap_adapter *na = nm_sock->na; - int ret = netmap_common_recvmsg(na, m, total_len); - int peek_len; - - /* Update the fake skbuff. */ - peek_len = netmap_common_peek_head_len(na); - if (peek_len) - nm_sock->fake_skb->len = peek_len; - else { - skb_dequeue(&nm_sock->sk.sk_receive_queue); - D("dequeue"); - } - - return ret; -} -#endif /* >= 2.6.35 */ - - -/* ########################## MODULE INIT ######################### */ - -struct miscdevice netmap_cdevsw = { /* same name as FreeBSD */ - MISC_DYNAMIC_MINOR, - "netmap", - &netmap_fops, -}; - - -static int linux_netmap_init(void) -{ - /* Errors have negative values on linux. */ - return -netmap_init(); -} - - -static void linux_netmap_fini(void) -{ - netmap_fini(); -} - - -module_init(linux_netmap_init); -module_exit(linux_netmap_fini); - -/* export certain symbols to other modules */ -EXPORT_SYMBOL(netmap_attach); /* driver attach routines */ -EXPORT_SYMBOL(netmap_detach); /* driver detach routines */ -EXPORT_SYMBOL(nm_txsync_prologue); /* txsync support */ -EXPORT_SYMBOL(nm_rxsync_prologue); /* rxsync support */ -EXPORT_SYMBOL(netmap_ring_reinit); /* ring init on error */ -EXPORT_SYMBOL(netmap_buffer_lut); -EXPORT_SYMBOL(netmap_total_buffers); /* index check */ -EXPORT_SYMBOL(netmap_buffer_base); -EXPORT_SYMBOL(netmap_reset); /* ring init routines */ -EXPORT_SYMBOL(netmap_buf_size); -EXPORT_SYMBOL(netmap_rx_irq); /* default irq handler */ -EXPORT_SYMBOL(netmap_no_pendintr); /* XXX mitigation - should go away */ -#ifdef WITH_VALE -EXPORT_SYMBOL(netmap_bdg_ctl); /* bridge configuration routine */ -EXPORT_SYMBOL(netmap_bdg_learning); /* the default lookup function */ -#endif /* WITH_VALE */ -EXPORT_SYMBOL(netmap_disable_all_rings); -EXPORT_SYMBOL(netmap_enable_all_rings); -EXPORT_SYMBOL(netmap_krings_create); - - -MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/"); -MODULE_DESCRIPTION("The netmap packet I/O framework"); -MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */ diff --git a/netmap/LINUX/netmap_set_adapter.sh b/netmap/LINUX/netmap_set_adapter.sh deleted file mode 100755 index cbc9cc0..0000000 --- a/netmap/LINUX/netmap_set_adapter.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/sh - -#set -x # for debugging - -if [ -z "$NMSRC" ]; then - NMSRC=~/netmap-release -fi -DRIVER="ixgbe" -#IF="eth0" # force an interface - -if [ ! -f ${NMSRC}/LINUX/netmap_lin.ko ]; then - echo "LINUX/netmap_lin.ko missing. Please compile netmap." - exit 1 -fi - -if [ ! -f ${NMSRC}/LINUX/${DRIVER}/${DRIVER}.ko ]; then - echo "LINUX/${DRIVER}/${DRIVER}.ko missing." - echo "Please compile netmap or make sure to have netmap support for ${DRIVER}" - exit 1 -fi - -NMLOADED=$(lsmod | grep netmap_lin | wc -l) -DRVLOADED=$(lsmod | grep "${DRIVER}" | wc -l) - -# Unload the driver -if [ $DRVLOADED != "0" ]; then - sudo rmmod "$DRIVER" -fi - -# Load netmap -if [ $NMLOADED == "0" ]; then - sudo insmod ${NMSRC}/LINUX/netmap_lin.ko -fi - -if [ "$1" == "g" ]; then - # In order to use generic netmap adapter, load the original driver module, that doesn't - # have netmap support - sudo modprobe ${DRIVER} - echo "Generic netmap adapter." -else - # Use the driver modified with netmap support - sudo insmod ${NMSRC}/LINUX/${DRIVER}/${DRIVER}.ko - echo "Native netmap adapter." -fi - -# Wait a bit for interface name changing -sleep 2 - -# Find all interfaces -IFLIST=$(ip link | grep -o "^[0-9]\+: [^:]\+" | awk '{print $2}') -IFLIST=$(echo ${IFLIST}) - -# Find the interface that match the driver $DRIVER -for i in $IFLIST; do - drv=$(sudo ethtool -i $i 2> /dev/null | grep "driver" | awk '{print $2}') - if [ "$drv" == "$DRIVER" ]; then - IF=$i - echo " Found interface \"${IF}\"" - fi -done - -if [ "$IF" == "" ]; then - echo "No interface using ${DRIVER} driver was found." - exit 1 -fi - -sudo ip link set ${IF} up - diff --git a/netmap/LINUX/patches b/netmap/LINUX/patches deleted file mode 120000 index 462a6e2..0000000 --- a/netmap/LINUX/patches +++ /dev/null @@ -1 +0,0 @@ -final-patches \ No newline at end of file diff --git a/netmap/LINUX/scripts/help b/netmap/LINUX/scripts/help deleted file mode 100755 index ea5a1d4..0000000 --- a/netmap/LINUX/scripts/help +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -sed -n 's/^## \?//p' $1 | fmt diff --git a/netmap/LINUX/scripts/linux-pktgen.sh b/netmap/LINUX/scripts/linux-pktgen.sh deleted file mode 100755 index 53480b1..0000000 --- a/netmap/LINUX/scripts/linux-pktgen.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/sh - -#set -x - - -function pgset() -{ - local result - - echo $1 > ${PGDEV} - - result=$(cat $PGDEV | fgrep "Result: OK:") - if [ "$result" = "" ]; then - cat $PGDEV | fgrep "Result:" - fi -} - - -##################### Script configuration ###################### -N="$1" # number of TX kthreads minus one -if [ -z "$1" ]; then - N=0 -fi -NCPUS="7" # number of CPUs on your machine minus one -IF="enp1s0f1" # network interface to test -DST_IP="10.216.8.1" # destination IP address -DST_MAC="00:1b:21:80:e7:d9" # destination MAC address -PKT_SIZE="60" # packet size -PKT_COUNT="10000000" # number of packets to send -CLONE_SKB="10000" # how many times a sk_buff is recycled - - -# Load pktgen kernel module -modprobe pktgen - - -# Clean the configuration for all the CPU-kthread (from 0 to ${NCPUS}) -IDX=$(seq 0 1 ${NCPUS}) -for cpu in ${IDX}; do - PGDEV="/proc/net/pktgen/kpktgend_${cpu}" - echo "Removing all devices (${cpu})" - pgset "rem_device_all" -done - -IDX=$(seq 0 1 ${N}) -for cpu in ${IDX}; do - # kthread-device configuration - PGDEV="/proc/net/pktgen/kpktgend_${cpu}" - echo "Configuring $PGDEV" - echo "Adding ${IF}@${cpu}" - pgset "add_device ${IF}@${cpu}" - - # Packets/mode configuration - PGDEV="/proc/net/pktgen/${IF}@${cpu}" - echo "Configuring $PGDEV" - pgset "count ${PKT_COUNT}" - pgset "clone_skb ${CLONE_SKB}" - pgset "pkt_size ${PKT_SIZE}" - pgset "delay 0" - pgset "dst $DST_IP" - pgset "dst_mac $DST_MAC" - pgset "flag QUEUE_MAP_CPU" - - echo "" -done - - -# Run -PGDEV="/proc/net/pktgen/pgctrl" -echo "Running... Ctrl-C to stop" -pgset "start" -echo "Done." - -# Show results -NUMS="" -for cpu in ${IDX}; do - TMP=$(cat /proc/net/pktgen/${IF}@${cpu} | grep -o "[0-9]\+pps" | grep -o "[0-9]\+") - echo "$cpu $TMP" - NUMS="${NUMS} ${TMP}" -done - -echo "Total TX rate: $(echo $NUMS | tr ' ' '+' | bc)" diff --git a/netmap/LINUX/scripts/np b/netmap/LINUX/scripts/np deleted file mode 100755 index c005e1f..0000000 --- a/netmap/LINUX/scripts/np +++ /dev/null @@ -1,428 +0,0 @@ -#!/bin/bash -## Manage linux driver patches for netmap. -## usage (from the dir containing the Makefile): -## -## scripts/np [args...] -## -## where is any of the functions below. -## - -[ -f scripts/conf ] && source scripts/conf - -## The following enviroment variables must be set: -## -## GITDIR: the absolute path of the netmap linux -## git repository, containing all the required netmap-* -## branches. -[ -n "$GITDIR" -a -d "$GITDIR/.git" ] || { - echo "GITDIR not set or not valid" >&2 - exit 1 -} - -NETMAP_BRANCH=${NETMAP_BRANCH:-master} - -function error { - echo "$@" >&2 - exit 1 -} - -function get-params { - local params=$1; shift - err_msg="$PROGNAME $COMMAND $(echo $params| perl -pe 's/\S+/<$&>/g')" - local param - for param in $params; do - [[ -z "$@" ]] && error "$err_msg" - pname=$(echo -n $param | perl -pe 's/\W/_/g') - eval $pname="$1" - shift - done - [[ -n "$@" ]] && error "$err_msg" -} - -## -## LINUX_SOURCES: the absolute path of a -## directory used to store all required linux-* source trees -## (The script will extract linux-x.y.z from GITDIR if it needs -## it and $LINUX_SOURCES does not already contain it). -## -## LINUX_CONFIGS: the absolute path of a -## directory containing the configuration files for -## the linux kernel. The file for version x must be named -## config-x. config-all can be used as a default. -## -## The configuration variables can be put in scripts/conf. -## - -## -## Available actions: -## - -## -## driver-path -## retrieves the path of in the linux sources -## for version . The path is output to stdout. -## It uses a local cache to minimize the expensive -## file system search. -function driver-path() -{ - get-params "driver version" "$@" - - cat cache/$version/$driver/path 2>/dev/null && return - local kern=$(get-kernel $version) - mkdir -p cache/$version/$driver - ( - cd $kern - find drivers/net -name $driver - ) | tee cache/$version/$driver/path -} - - -## -## get-patch [-c] -## extract the netmap patch for the given and the -## given kernel . The patch is stored in tmp-patches -## and the name of the patch is output to stdout. -## If a patch with the same name already exists in tmp-patches -## it is overwritten, unless the -c option is used, -## in which case the existing patch is kept (the patch name is still output). -function get-patch() -{ - local use_cache - [ "$1" = -c ] && { use_cache=1; shift; } - - get-params "driver version" "$@" - - # convert kernel version to fixed notation - local v1=$(scripts/vers $version -c) - # compute next kernel version (in fixed notation) - local v2=$(scripts/vers $version -i -c) - local patchname=diff--$driver--$v1--$v2 - local out=tmp-patches/$patchname - [ -n "$use_cache" -a -s $out ] && { echo $out; return; } - local drvpath=$(driver-path $driver $version) - [ -n "$drvpath" ] || return - local drvdir=$(dirname $drvpath) - ( - cd $GITDIR - git diff --relative=$drvdir v$version..netmap-$version -- $drvpath - ) > $out - # an empty patch means no netmap support for this driver - [ -s $out ] || { rm $out; return 1; } - echo $out - return 0; -} - -## -## get-range -## extracts the netmap patches for the given for -## all the kernel versions from (included) to -## (excluded). All patches are stored in tmp-patches -## and their names are output to stdout. -function get-range() -{ - get-params "driver version1 version2" "$@" - - local v=$version1 - # while version is less than $version2 - while scripts/vers -b $v $version2 -L; do - get-patch $driver $v - # compute next version - v=$(scripts/vers $v -i) - done -} - - -## -## get-src -## copies the original sources of the given , -## from the given kernel to the given -## directory. -## It uses a local cache to minimize the expensive -## checkouts in GITDIR. -function get-src() -{ - get-params "driver version dest" "$@" - - local kern=$(get-kernel $version) - local src=$(driver-path $driver $version) - cp -r $kern/$src $dest -} - - -## -## extend -## checks wether the range of applicability of the -## given can be extented to include . -## It returns 0 on success and 1 on failure. -function extend() -{ - get-params "patch version" "$@" - - local _patch=$(realpath $patch) - # extract the driver name from the patch name - local driver=$(scripts/vers $_patch -s -p -p) - local tmpdir1=$(mktemp -d) - local tmpdir2=$(mktemp -d) - trap "rm -rf $tmpdir1 $tmpdir2" 0 - # we get the driver sources for the given and - # we apply two patches separately: - # i) the given ; - # ii) the proper patch from GITDIR. - # We declare to be extendable if - # - it is still applicable AND - # - we obtain the same files from i) and ii) (ignoring whitespace) - get-src $driver $version $tmpdir1 - get-src $driver $version $tmpdir2 - ( - cd $tmpdir1 - patch --no-backup-if-mismatch -p1 < $_patch >/dev/null 2>&1 - ) || return 1 - local patch2=$(get-patch -c $driver $version) - patch2=$(realpath $patch2) - ( - cd $tmpdir2 - patch -p1 < $patch2 >/dev/null 2>&1 - ) # this will certainly apply - diff -qbBr $tmpdir1 $tmpdir2 >/dev/null || return 1 - return 0 -} - -## -## minimize -## tries to minimize the number of patch files for the given -## . It uses the patches currently found in tmp-patches -## and stores the resulting patches in final-patches. -## If final-patches already contained patches for , -## they are deleted first. -function minimize() -{ - get-params "driver" "$@" - - mkdir -p final-patches - local drv=$(basename $driver) - local patches=$(ls tmp-patches/diff--$drv--* 2>/dev/null) - [ -n "$patches" ] || return 1 - # put the patch names in $1, $2, ... - set $patches - rm -f final-patches/diff--$drv--* - # the original patches (in tmp-patches) are ordered by version number. - # We consider one patch in turn (the 'pivot') and try - # to extend its range to cover the range of the next - # patch. If this succedes, the merged patch is the new - # pivot, otherwise the current pivot is output and the - # next patch becomes the new pivot. The process - # is repeated until there are no more patches to consider. - local pivot=$1 - [ -n "$pivot" -a -e "$pivot" ] || return 1 - # extract the left end and right end of the pivot's range - local ple=$(scripts/vers $pivot -s -p -C) - local pre=$(scripts/vers $pivot -s -C) - while [ -n "$pivot" ]; do - shift - if [ -n "$1" ]; then - # extract the left end and right end of the next patch - local nle=$(scripts/vers $1 -s -p -C) - local nre=$(scripts/vers $1 -s -C) - # we admit no gaps in the range - if [ $pre = $nle ] && extend $pivot $nle; then - pre=$nre - continue - fi - fi - # either out of patches or failed merge. - # Compute the file name of the current pivot and store - # the patch in its final location - out=$(scripts/vers diff $drv $ple -c $pre -c -S4) - cp $pivot final-patches/$out - # the new pivot becames the next patch (if any) - pivot=$1 - pre=$nre - ple=$nle - done - return 0 -} - -## -## infty -## if final-patches contains a patch for with a range -## ending in , extend it to infinity. -## Do nothing otherwise. -function infty() -{ - get-params "driver version" "$@" - - local drv=$(basename $driver) - # convert kernel version to fixed notation - local v=$(scripts/vers $version -c) - local last=$(ls final-patches/diff--$drv--*--$v 2>/dev/null|tail -n1) - [ -n "$last" ] || return 1 - mv -n $last $(scripts/vers $last -s -p 99999 -S4) 2>/dev/null -} - -function get-kernel() -{ - get-params "version" "$@" - - local dst="$(realpath $LINUX_SOURCES)/linux-$version" - - [ -d $dst ] && { echo $dst; return; } - - mkdir -p $dst - - ( - cd $GITDIR - git archive v$v | tar xf - -C $dst - ) - echo $dst -} - - -## -## build-prep -## prepare the linux tree for to be ready -## for external modules compilation. -## The tree is put in $LINUX_SOURCES/linux- and the -## configuration is obtained from $LINUX_CONFIGS/config- -## (or $LINUX_CONFIGS/config-all by default). -## Errors are logged to $LINUX_CONFIGS/linux-.log. -## If $LINUX_SOURCES/linux- already exists, -## nothing is done. -## In all cases, the absolute path of linux- is -## output. -function build-prep() -{ - get-params "version" "$@" - - local dst=$(get-kernel $version) - - exec 3>&1 4>&2 >$dst.log 2>&1 - cp $LINUX_CONFIGS/config-$v $dst/.config 2>/dev/null || - cp $LINUX_CONFIGS/config-all $dst/.config - ( - cd $dst - yes '' | make oldconfig - make modules_prepare - ) - exec 1>&3 2>&4 - echo $dst -} - -## -## check-patch -## check that the given applies and compiles without -## error for all its declared range of applicability. -## Errors are logged to log/. -function check-patch() -{ - get-params "patch" "$@" - - # extract the left version - local v1=$(scripts/vers $patch -s -p -C) - # extract the right version - local v2=$(scripts/vers $patch -s -C) - # extract the driver name - local driver=$(scripts/vers $patch -s -p -p) - local p=$(realpath $patch) - mkdir -p log - local log="$(realpath log)/$(basename $patch)" - local nmcommit=$(cd ..; git show-ref -s heads/$NETMAP_BRANCH) - - echo -n $patch... - - while scripts/vers -b $v1 $v2 -L; do - # cache lookup - local cache=cache/$v1/$driver - local cpatch=$cache/patch - local cnmcommit=$cache/nmcommit - local cstatus=$cache/status - local clog=$cache/log - if [ -f $cpatch ] && - cmp -s $cpatch $patch && - [ "$nmcommit" = "$(cat $cnmcommit)" ]; then - cp $clog $log - ok=$(cat $cstatus) - else - # update cache - cp $patch $cpatch - echo $nmcommit > $cnmcommit - - local ksrc=$(build-prep $v1) - local tmpdir=$(mktemp -d) - trap "rm -rf $tmpdir" 0 - (cd ..; git archive $NETMAP_BRANCH | tar xf - -C $tmpdir ) - pushd $tmpdir/LINUX >/dev/null - mkdir single-patch - rm patches - ln -s single-patch patches - cp $p single-patch - ok=false - make KSRC=$ksrc >$log 2>&1 && ok=true - popd >/dev/null - cp $log $clog - fi - [ $ok = true ] || { echo FAILED; echo false > $cstatus; return 1; } - echo true > $cstatus - rm -rf $tmpdir - # compute next version - v1=$(scripts/vers $v1 -i) - done - echo OK -} - -## -## build-check -## do a check-patch for all the patches of that are -## currently in tmp-patches. Patches that fail the check -## are moved to failed-patches. -function build-check() -{ - get-params "driver" "$@" - - mkdir -p failed-patches - local drv=$(basename $driver) - local patches=$(ls tmp-patches/diff--$drv--* 2>/dev/null) - local p - for p in $patches; do - check-patch $p || mv $p failed-patches - done -} - -## -## forall [args...] -## exec [args...] for all known drivers. -function forall() -{ - local cmd=$1 - shift - # we obtain the value of DRIVER_SRC from the makefile - # (the +% target is defined in our Makefile and prints - # the contents of variable %) - local driver_srcs=$(make +DRIVER_SRCS) - - local driver - for driver in $driver_srcs; do - $cmd $(basename $driver) "$@" - done -} - -mkdir -p tmp-patches - -PROGNAME=$0 - -[ -n "$1" ] || { - scripts/help $PROGNAME; - exit 1 -} - -COMMAND=$1; shift -case $COMMAND in -*-all) - forall ${COMMAND%-all} "$@" - ;; --[hH]|--help|-help|help) - scripts/help $PROGNAME - ;; -*) - $COMMAND "$@" - ;; -esac diff --git a/netmap/LINUX/scripts/vers b/netmap/LINUX/scripts/vers deleted file mode 100755 index 38ba38e..0000000 --- a/netmap/LINUX/scripts/vers +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/perl -## Simple stack-based RPN calculator for linux version numbers. -## Usage: -## -## scripts/vers [operand|operation ...] -## -## Operations all start with '-', everything else is an operand -## and is pushed on the stack as-is. -## When all arguments have been processed, the content of the -## top of the stack is printed on stdout and the script ends. -## -## Available operations: - -sub badversion -{ - my $v = shift; - die "Bad version $v"; -} - -sub conv -{ - my $v = shift; - - return sprintf "%x%02x%02x", (split /\./, $v); -} - - -sub rconv -{ - my $v = shift; - - $v =~ /(.*)(..)(..)$/; - if ($1 > 2 && (hex $3) == 0) { - return sprintf "%d.%d", (hex $1), (hex $2); - } - return sprintf "%d.%d.%d", (hex $1), (hex $2), (hex $3); -} - -sub next -{ - my $v = shift; - my ($may, $min, $sub) = split /\./, $v; - - if ($may < 2 || ($may == 2 && $min != 6)) { - &badversion($v); - } - if ($may == 2) { - if ($sub < 39) { - return "2.6." . ($sub + 1); - } elsif ($sub == 39) { - return "3.0"; - } else { - &badversion($v); - } - } else { - return "$may." . ($min + 1); - } -} - -@ARGV or do { system("scripts/help $0"); exit 1; }; - -for (@ARGV) { -## -## -b (nullary) suppress normal output. On exit, return 1 -## if stack top is "false", 0 otherwise. - /^-b$/ && do { - $silent=1; - next; - }; -## -## -c (unary) convert from dot to fixed notation - /^-c$/ && do { - $v = pop @stack; - push @stack, &conv($v); - next; - }; -## -## -C (unary) convert from fixed to dot notation - /^-C$/ && do { - $v = pop @stack; - push @stack, &rconv($v); - next; - }; -## -## -i (unary) increment version number -## (must be in dot notation) - /^-i$/ && do { - $v = pop @stack; - push @stack, &next($v); - next; - }; -## -## -s (unary) assume the stack top is a -## string containing several fields separated -## by '--'. Replace the stack top with these -## fields (last on top) - /^-s$/ && do { - $v = pop @stack; - push @stack, split /--/, $v; - next; - }; -## -## -SN (N-ary) pop N elements from the stack, -## join them using '--' as a separator -## (top as last) and push the resulting -## string - /^-S(\d+)$/ && do { - $n = $1; - @t = @stack[-$n..-1]; - while ($n--) { - pop @stack; - } - push @stack, (join '--', @t); - next; - }; -## -## -p (unary) pop - /^-p$/ && do { - pop @stack; - next; - }; -## -## -l (binary) push "true" if first version -## number is stricly less then second version -## number (versions in fixed notation) -## -## -L (binary) like -l, but for version numbers -## in dot notation - /^-[lL]$/ && do { - $v1 = pop @stack; - $v2 = pop @stack; - /^-L$/ && do { - $v1 = &conv($v1); - $v2 = &conv($v2); - }; - push @stack, (($v2 lt $v1) ? "true" : "false"); - next; - }; -## -## -a (binary) logical and. Arguments must be -## either "true" or "false". - /^-a$/ && do { - $v1 = pop @stack; - $v2 = pop @stack; - push @stack, (($v1 eq "true" && $v2 eq "true") ? "true" : "false"); - next; - }; -## -## -n (unary) logical not. Argument must be -## either "true" or "false". - /^-n$/ && do { - $v1 = pop @stack; - push @stack, (($v1 eq "true") ? "false" : "true"); - next; - }; - push @stack, $_; -} -$v = pop @stack; -if ($silent) { - exit ($v eq "false"); -} -print "$v\n"; diff --git a/netmap/LINUX/virtio_netmap.h b/netmap/LINUX/virtio_netmap.h deleted file mode 100644 index 7825872..0000000 --- a/netmap/LINUX/virtio_netmap.h +++ /dev/null @@ -1,538 +0,0 @@ -/* - * Copyright (C) 2014 Vincenzo Maffione. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include - - -#define SOFTC_T virtnet_info - -static int virtnet_close(struct ifnet *ifp); -static int virtnet_open(struct ifnet *ifp); -static void free_receive_bufs(struct virtnet_info *vi); - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -/* Before 2.6.35 there was no net_device.num_rx_queues, so we assume 1. */ -#define DEV_NUM_RX_QUEUES(_netdev) 1 -/* A scatterlist struct is needed by functions that invoke - virtqueue_add_buf() methods, but before 2.6.35 these struct were - not part of virtio-net data structures, but were defined in those - function. This macro does this definition, which is not necessary - for subsequent versions. */ -#define COMPAT_DECL_SG struct scatterlist _compat_sg; - -#else /* >= 2.6.35 */ - -#define DEV_NUM_RX_QUEUES(_netdev) (_netdev)->num_rx_queues -#define COMPAT_DECL_SG - -#endif /* >= 2.6.35 */ - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -/* Before 2.6.35, the virtio interface was not exported with functions, - but using virtqueue callbacks. */ -#define virtqueue_detach_unused_buf(_vq) \ - (_vq)->vq_ops->detach_unused_buf(_vq) -#define virtqueue_get_buf(_vq, _lp) \ - (_vq)->vq_ops->get_buf(_vq, _lp) -#define virtqueue_add_inbuf(_vq, _sg, _num, _tok, _gfp) \ - (_vq)->vq_ops->add_buf(_vq, _sg, 0, _num, _tok) -#define virtqueue_add_outbuf(_vq, _sg, _num, _tok, _gfp) \ - (_vq)->vq_ops->add_buf(_vq, _sg, _num, 0, _tok) -#define virtqueue_kick(_vq) \ - (_vq)->vq_ops->kick(_vq) -#define virtqueue_enable_cb(_vq) \ - (_vq)->vq_ops->enable_cb(_vq) - -#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) -/* Some simple renaming due to virtio interface changes. */ -#define virtqueue_add_inbuf(_vq, _sg, _num, _tok, _gfp) \ - virtqueue_add_buf_gfp(_vq, _sg, 0, _num, _tok, _gfp) -#define virtqueue_add_outbuf(_vq, _sg, _num, _tok, _gfp) \ - virtqueue_add_buf_gfp(_vq, _sg, _num, 0, _tok, _gfp) - -#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -/* Some simple renaming due to virtio interface changes. */ -#define virtqueue_add_inbuf(_vq, _sg, _num, _tok, _gfp) \ - virtqueue_add_buf(_vq, _sg, 0, _num, _tok, _gfp) -#define virtqueue_add_outbuf(_vq, _sg, _num, _tok, _gfp) \ - virtqueue_add_buf(_vq, _sg, _num, 0, _tok, _gfp) - -#endif /* 3.3 <= VER < 3.10.0 */ - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) -/* The delayed optimization did not exists before version 3.0. */ -#define virtqueue_enable_cb_delayed(_vq) virtqueue_enable_cb(_vq) -#endif /* < 3.0 */ - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 2, 0) -/* Not yet found a way to find out virtqueue length in these - kernel series. Use the virtio default value. */ -#define virtqueue_get_vring_size(_vq) 256 -#endif /* < 3.2 */ - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) -/* Before 3.8.0 virtio did not have multiple queues, and therefore - it did not have per-queue data structures. We then abstract the - way data structure are accessed, ignoring the queue indexes. */ -#define DECR_NUM(_vi, _i) --(_vi)->num -#define GET_RX_VQ(_vi, _i) (_vi)->rvq -#define GET_TX_VQ(_vi, _i) (_vi)->svq -#define VQ_FULL(_vq, _err) (_err > 0) - -static void give_pages(struct SOFTC_T *vi, struct page *page); -static struct page *get_a_page(struct SOFTC_T *vi, gfp_t gfp_mask); -#define GIVE_PAGES(_vi, _i, _buf) give_pages(_vi, _buf) - -/* This function did not exists, there was just the code. */ -static void free_receive_bufs(struct SOFTC_T *vi) -{ - while (vi->pages) - __free_pages(get_a_page(vi, GFP_KERNEL), 0); -} - -#else /* >= 3.8.0 */ - -static void give_pages(struct receive_queue *rq, struct page *page); -#define GIVE_PAGES(_vi, _i, _buf) give_pages(&(_vi)->rq[_i], _buf) -#define DECR_NUM(_vi, _i) --(_vi)->rq[_i].num -#define GET_RX_VQ(_vi, _i) (_vi)->rq[_i].vq -#define GET_TX_VQ(_vi, _i) (_vi)->sq[_i].vq -#define VQ_FULL(_vq, _err) ((_vq)->num_free == 0) - -#endif /* >= 3.8.0 */ - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -/* Use the scatterlist struct defined in the current function - (see above). */ -#define GET_RX_SG(_vi, _i) &_compat_sg -#define GET_TX_SG(_vi, _i) &_compat_sg - -#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) -/* Also here we create an abstraction because of multiqueue support - (see above). */ -#define GET_RX_SG(_vi, _i) (_vi)->rx_sg -#define GET_TX_SG(_vi, _i) (_vi)->tx_sg - -#else /* >= 3.8.0 */ - -#define GET_RX_SG(_vi, _i) (_vi)->rq[_i].sg -#define GET_TX_SG(_vi, _i) (_vi)->sq[_i].sg - -#endif /* >= 3.8.0 */ - - -/* Free all the unused buffer in all the RX virtqueues. - * This function is called when entering and exiting netmap mode. - * In the former case, the unused buffers point to memory allocated by - * the virtio-driver (e.g. sk_buffs). We need to free that - * memory, otherwise we have leakage. - * In the latter case, the unused buffers point to memory allocated by - * netmap, and so we don't need to free anything. - * We scan all the RX virtqueues, even those that have not been - * activated (by 'ethtool --set-channels eth0 combined $N'). - */ -static void virtio_netmap_free_rx_unused_bufs(struct SOFTC_T* vi, int onoff) -{ - void *buf; - int i, c; - - for (i = 0; i < DEV_NUM_RX_QUEUES(vi->dev); i++) { - struct virtqueue *vq = GET_RX_VQ(vi, i); - - c = 0; - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (onoff) { - if (vi->mergeable_rx_bufs || vi->big_packets) - GIVE_PAGES(vi, i, buf); - else - dev_kfree_skb(buf); - } - DECR_NUM(vi, i); - c++; - } - D("[%d] freed %d rx unused bufs on queue %d", onoff, c, i); - } -} - -/* Register and unregister. */ -static int -virtio_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *vi = netdev_priv(ifp); - struct netmap_hw_adapter *hwna = (struct netmap_hw_adapter*)na; - int error = 0; - - if (na == NULL) - return EINVAL; - - /* It's important to deny the registration if the interface is - not up, otherwise the virtnet_close() is not matched by a - virtnet_open(), and so a napi_disable() is not matched by - a napi_enable(), which results in a deadlock. */ - if (!netif_running(ifp)) - return EBUSY; - - rtnl_lock(); - - /* Down the interface. This also disables napi. */ - virtnet_close(ifp); - - if (onoff) { - /* We have to drain the RX virtqueues, otherwise the - * virtio_netmap_init_buffer() called by the subsequent - * virtnet_open() cannot link the netmap buffers to the - * virtio RX ring. */ - virtio_netmap_free_rx_unused_bufs(vi, onoff); - /* Also free the pages allocated by the driver. */ - free_receive_bufs(vi); - - /* enable netmap mode */ - ifp->if_capenable |= IFCAP_NETMAP; - na->na_flags |= NAF_NATIVE_ON; - na->if_transmit = (void *)ifp->netdev_ops; - ifp->netdev_ops = &hwna->nm_ndo; - } else { - ifp->if_capenable &= ~IFCAP_NETMAP; - na->na_flags &= ~NAF_NATIVE_ON; - ifp->netdev_ops = (void *)na->if_transmit; - - /* Drain the RX virtqueues, otherwise the driver will - * interpret the netmap buffers currently linked to the - * netmap ring as buffers allocated by the driver. This - * would break the driver (and kernel panic/ooops). */ - virtio_netmap_free_rx_unused_bufs(vi, onoff); - } - - /* Up the interface. This also enables the napi. */ - virtnet_open(ifp); - - rtnl_unlock(); - - return (error); -} - - -/* Reconcile kernel and user view of the transmit ring. */ -static int -virtio_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - - /* device-specific */ - COMPAT_DECL_SG - struct SOFTC_T *vi = netdev_priv(ifp); - struct virtqueue *vq = GET_TX_VQ(vi, ring_nr); - struct scatterlist *sg = GET_TX_SG(vi, ring_nr); - struct netmap_adapter *token; - - // XXX invert the order - /* Free used slots. We only consider our own used buffers, recognized - * by the token we passed to virtqueue_add_outbuf. - */ - n = 0; - for (;;) { - token = virtqueue_get_buf(vq, &nic_i); /* dummy 2nd arg */ - if (token == NULL) - break; - if (likely(token == na)) - n++; - } - kring->nr_hwtail += n; - if (kring->nr_hwtail > lim) - kring->nr_hwtail -= lim + 1; - - /* - * First part: process new packets to send. - */ - rmb(); - - if (!netif_carrier_ok(ifp)) { - /* All the new slots are now unavailable. */ - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - void *addr = NMB(slot); - int err; - - NM_CHECK_ADDR_LEN(addr, len); - - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - /* Initialize the scatterlist, expose it to the hypervisor, - * and kick the hypervisor (if necessary). - */ - sg_set_buf(sg, addr, len); - err = virtqueue_add_outbuf(vq, sg, 1, na, GFP_ATOMIC); - if (err < 0) { - D("virtqueue_add_outbuf failed"); - break; - } - virtqueue_kick(vq); - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - /* Update hwcur depending on where we stopped. */ - kring->nr_hwcur = nm_i; /* note we migth break early */ - - /* No more free TX slots? Ask the hypervisor for notifications, - * possibly only when a considerable amount of work has been - * done. - */ - if (nm_kr_txempty(kring)) - virtqueue_enable_cb_delayed(vq); - } -out: - nm_txsync_finalize(kring); - - return 0; -} - - -/* Reconcile kernel and user view of the receive ring. */ -static int -virtio_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int ring_nr = kring->ring_id; - u_int nm_i; /* index into the netmap ring */ - // u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - COMPAT_DECL_SG - struct SOFTC_T *vi = netdev_priv(ifp); - struct virtqueue *vq = GET_RX_VQ(vi, ring_nr); - struct scatterlist *sg = GET_RX_SG(vi, ring_nr); - - /* XXX netif_carrier_ok ? */ - - if (head > lim) - return netmap_ring_reinit(kring); - - rmb(); - /* - * First part: import newly received packets. - * Only accept our - * own buffers (matching the token). We should only get - * matching buffers, because of virtio_netmap_free_rx_unused_bufs() - * and virtio_netmap_init_buffers(). - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - struct netmap_adapter *token; - - nm_i = kring->nr_hwtail; - n = 0; - for (;;) { - int len; - token = virtqueue_get_buf(vq, &len); - if (token == NULL) - break; - if (likely(token == na)) { - ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; - nm_i = nm_next(nm_i, lim); - n++; - } else { - D("This should not happen"); - } - } - kring->nr_hwtail = nm_i; - kring->nr_kflags &= ~NKR_PENDINTR; - } - ND("[B] h %d c %d hwcur %d hwtail %d", - ring->head, ring->cur, kring->nr_hwcur, - kring->nr_hwtail); - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; /* netmap ring index */ - if (nm_i != head) { - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - void *addr = NMB(slot); - int err; - - if (addr == netmap_buffer_base) /* bad buf */ - return netmap_ring_reinit(kring); - - slot->flags &= ~NS_BUF_CHANGED; - - /* Initialize the scatterlist, expose it to the hypervisor, - * and kick the hypervisor (if necessary). - */ - sg_set_buf(sg, addr, ring->nr_buf_size); - err = virtqueue_add_inbuf(vq, sg, 1, na, GFP_ATOMIC); - if (err < 0) { - D("virtqueue_add_inbuf failed"); - return err; - } - virtqueue_kick(vq); - nm_i = nm_next(nm_i, lim); - } - kring->nr_hwcur = head; - } - - /* We have finished processing used RX buffers, so we have to tell - * the hypervisor to make a call when more used RX buffers will be - * ready. - */ - virtqueue_enable_cb(vq); - - /* tell userspace that there might be new packets. */ - nm_rxsync_finalize(kring); - - ND("[C] h %d c %d t %d hwcur %d hwtail %d", - ring->head, ring->cur, ring->tail, - kring->nr_hwcur, kring->nr_hwtail); - - return 0; -} - - -/* Make RX virtqueues buffers pointing to netmap buffers. */ -static int virtio_netmap_init_buffers(struct SOFTC_T *vi) -{ - struct ifnet *ifp = vi->dev; - struct netmap_adapter* na = NA(ifp); - unsigned int r; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - for (r = 0; r < na->num_rx_rings; r++) { - COMPAT_DECL_SG - struct netmap_ring *ring = na->rx_rings[r].ring; - struct virtqueue *vq = GET_RX_VQ(vi, r); - struct scatterlist *sg = GET_RX_SG(vi, r); - struct netmap_slot* slot; - unsigned int i; - int err = 0; - - slot = netmap_reset(na, NR_RX, r, 0); - if (!slot) { - D("strange, null netmap ring %d", r); - return 0; - } - - /* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue. - * It's important to leave one virtqueue slot free, otherwise - * we can run into ring->cur/ring->tail wraparounds. - */ - for (i = 0; i < na->num_rx_desc-1; i++) { - void *addr; - - slot = &ring->slot[i]; - addr = NMB(slot); - sg_set_buf(sg, addr, ring->nr_buf_size); - err = virtqueue_add_inbuf(vq, sg, 1, na, GFP_ATOMIC); - if (err < 0) { - D("virtqueue_add_inbuf failed"); - - return 0; - } - if (VQ_FULL(vq, err)) - break; - } - D("added %d inbufs on queue %d", i, r); - virtqueue_kick(vq); - } - - return 1; -} - -/* Update the virtio-net device configurations. Number of queues can - * change dinamically, by 'ethtool --set-channels $IFNAME combined $N'. - * This is actually the only way virtio-net can currently enable - * the multiqueue mode. - */ -static int -virtio_netmap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *vi = netdev_priv(ifp); - - *txr = ifp->real_num_tx_queues; - *txd = virtqueue_get_vring_size(GET_TX_VQ(vi, 0)); - *rxr = 1; - *rxd = virtqueue_get_vring_size(GET_RX_VQ(vi, 0)); - D("virtio config txq=%d, txd=%d rxq=%d, rxd=%d", - *txr, *txd, *rxr, *rxd); - - return 0; -} - -static void -virtio_netmap_attach(struct SOFTC_T *vi) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = vi->dev; - na.num_tx_desc = virtqueue_get_vring_size(GET_TX_VQ(vi, 0)); - na.num_rx_desc = virtqueue_get_vring_size(GET_RX_VQ(vi, 0)); - na.nm_register = virtio_netmap_reg; - na.nm_txsync = virtio_netmap_txsync; - na.nm_rxsync = virtio_netmap_rxsync; - na.nm_config = virtio_netmap_config; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); - - D("virtio attached txq=%d, txd=%d rxq=%d, rxd=%d", - na.num_tx_rings, na.num_tx_desc, - na.num_tx_rings, na.num_rx_desc); -} -/* end of file */ diff --git a/netmap/LINUX/wip-patches/diff--mellanox--30300--30800 b/netmap/LINUX/wip-patches/diff--mellanox--30300--30800 deleted file mode 100644 index 6fc5ddf..0000000 --- a/netmap/LINUX/wip-patches/diff--mellanox--30300--30800 +++ /dev/null @@ -1,145 +0,0 @@ -diff -urp --exclude '*.o' --exclude '*.cmd' --exclude '*mod.c' drivers/net/ethernet/mellanox/mlx4/en_netdev.c ./mellanox/mlx4/en_netdev.c ---- drivers/net/ethernet/mellanox/mlx4/en_netdev.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mellanox/mlx4/en_netdev.c 2012-09-27 00:05:22.703523430 -0700 -@@ -48,6 +48,39 @@ - #include "mlx4_en.h" - #include "en_port.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * This driver is split in multiple small files. -+ * The main device descriptor has type struct mlx4_en_priv *priv; -+ * and we attach to the device in mlx4_en_init_netdev() -+ * (do port numbers start from 1 ?) -+ * -+ * The reconfig routine is in mlx4_en_start_port() (also here) -+ * which is called on a mlx4_en_restart() (watchdog), open and set-mtu. -+ * -+ * priv->num_frags ?? -+ * DS_SIZE ?? -+ * apparently each rx desc is followed by frag.descriptors -+ * and the rx desc is rounded up to a power of 2. -+ * -+ * Receive code is in en_rx.c -+ * priv->rx_ring_num number of rx rings -+ * rxr = prov->rx_ring[ring_ind] rx ring descriptor -+ * rxr->size number of slots -+ * rxr->prod producer -+ * probably written into a mmio reg at *rxr->wqres.db.db -+ * trimmed to 16 bits. -+ * -+ * Rx init routine: -+ * mlx4_en_activate_rx_rings() -+ * mlx4_en_init_rx_desc() -+ * Transmit code is in en_tx.c -+ */ -+ -+#define NETMAP_MLX4_MAIN -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_setup_tc(struct net_device *dev, u8 up) - { - if (up != MLX4_EN_NUM_UP) -@@ -1042,6 +1075,9 @@ int mlx4_en_start_port(struct net_device - /* Set initial ownership of all Tx TXBBs to SW (1) */ - for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) - *((u32 *) (tx_ring->buf + j)) = 0xffffffff; -+#ifdef DEV_NETMAP -+ mlx4_netmap_tx_config(priv, i); -+#endif /* DEV_NETMAP */ - ++tx_index; - } - -@@ -1639,6 +1675,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d - en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); - - queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); -+#ifdef DEV_NETMAP -+ mlx4_netmap_attach(priv); -+#endif /* DEV_NETMAP */ - return 0; - - out: ---- drivers/net/ethernet/mellanox/mlx4/en_rx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mellanox/mlx4/en_rx.c 2012-09-27 00:13:16.099550954 -0700 -@@ -41,6 +41,9 @@ - - #include "mlx4_en.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif /* !DEV_NETMAP */ - - static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, -@@ -365,9 +368,16 @@ int mlx4_en_activate_rx_rings(struct mlx - ring = &priv->rx_ring[ring_ind]; - - ring->size_mask = ring->actual_size - 1; -+#ifdef DEV_NETMAP -+ if (priv->dev->if_capenable & IFCAP_NETMAP) { -+ int saved_cons = ring->cons; -+ mlx4_en_free_rx_buf(priv, ring); -+ ring->cons = saved_cons; -+ mlx4_netmap_rx_config(priv, ring_ind); -+ } -+#endif /* DEV_NETMAP */ - mlx4_en_update_rx_prod_db(ring); - } -- - return 0; - - err_buffers: -@@ -402,6 +412,11 @@ void mlx4_en_destroy_rx_ring(struct mlx4 - void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) - { -+#ifdef DEV_NETMAP -+ if (priv->dev->if_capenable & IFCAP_NETMAP) -+ ND("netmap mode, rx buf already freed"); -+ else -+#endif /* DEV_NETMAP */ - mlx4_en_free_rx_buf(priv, ring); - if (ring->stride <= TXBB_SIZE) - ring->buf -= TXBB_SIZE; -@@ -718,6 +739,11 @@ int mlx4_en_poll_rx_cq(struct napi_struc - struct mlx4_en_priv *priv = netdev_priv(dev); - int done; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(cq->dev, cq->ring, &done)) { -+ ND("rx_irq %d for netmap, budget %d done %d", cq->ring, budget, done); -+ } else -+#endif /* DEV_NETMAP */ - done = mlx4_en_process_rx_cq(dev, cq, budget); - - /* If we used up all the quota - we're probably not done yet... */ ---- drivers/net/ethernet/mellanox/mlx4/en_tx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mellanox/mlx4/en_tx.c 2012-09-27 00:05:22.713523348 -0700 -@@ -55,6 +55,10 @@ MODULE_PARM_DESC(inline_thold, "threshol - - static u32 hashrnd __read_mostly; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, u32 size, - u16 stride) -@@ -396,6 +400,13 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) - - if (!spin_trylock(&ring->comp_lock)) - return; -+#ifdef DEV_NETMAP -+ /* XXX should be integrated with appropriate lock_wrapper manner? */ -+ if (netmap_tx_irq(cq->dev, cq->ring)) { -+ ND(5, "wakeup queue %d", cq->ring); -+ spin_unlock(&ring->comp_lock); -+ return; -+ } -+#endif /* DEV_NETMAP */ - mlx4_en_process_tx_cq(cq->dev, cq); - mod_timer(&cq->timer, jiffies + 1); - spin_unlock(&ring->comp_lock); diff --git a/netmap/LINUX/wip-patches/diff--mlx4--20630--30200 b/netmap/LINUX/wip-patches/diff--mlx4--20630--30200 deleted file mode 100644 index e0cce6a..0000000 --- a/netmap/LINUX/wip-patches/diff--mlx4--20630--30200 +++ /dev/null @@ -1,163 +0,0 @@ -diff -urp --exclude '*.o' --exclude '*.cmd' --exclude '*mod.c' drivers/net/ethernet/mellanox/mlx4/en_netdev.c ./mellanox/mlx4/en_netdev.c ---- drivers/net/ethernet/mellanox/mlx4/en_netdev.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mlx4/en_netdev.c 2012-09-27 00:05:22.703523430 -0700 -@@ -48,6 +48,39 @@ - #include "mlx4_en.h" - #include "en_port.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * This driver is split in multiple small files. -+ * The main device descriptor has type struct mlx4_en_priv *priv; -+ * and we attach to the device in mlx4_en_init_netdev() -+ * (do port numbers start from 1 ?) -+ * -+ * The reconfig routine is in mlx4_en_start_port() (also here) -+ * which is called on a mlx4_en_restart() (watchdog), open and set-mtu. -+ * -+ * priv->num_frags ?? -+ * DS_SIZE ?? -+ * apparently each rx desc is followed by frag.descriptors -+ * and the rx desc is rounded up to a power of 2. -+ * -+ * Receive code is in en_rx.c -+ * priv->rx_ring_num number of rx rings -+ * rxr = prov->rx_ring[ring_ind] rx ring descriptor -+ * rxr->size number of slots -+ * rxr->prod producer -+ * probably written into a mmio reg at *rxr->wqres.db.db -+ * trimmed to 16 bits. -+ * -+ * Rx init routine: -+ * mlx4_en_activate_rx_rings() -+ * mlx4_en_init_rx_desc() -+ * Transmit code is in en_tx.c -+ */ -+ -+#define NETMAP_MLX4_MAIN -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_setup_tc(struct net_device *dev, u8 up) - { - if (up != MLX4_EN_NUM_UP) -@@ -1042,6 +1075,9 @@ int mlx4_en_start_port(struct net_device - /* Set initial ownership of all Tx TXBBs to SW (1) */ - for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) - *((u32 *) (tx_ring->buf + j)) = 0xffffffff; -+#ifdef DEV_NETMAP -+ mlx4_netmap_tx_config(priv, i); -+#endif /* DEV_NETMAP */ - ++tx_index; - } - -@@ -1639,6 +1675,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d - en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); - - queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); -+#ifdef DEV_NETMAP -+ mlx4_netmap_attach(priv); -+#endif /* DEV_NETMAP */ - return 0; - - out: ---- drivers/net/ethernet/mellanox/mlx4/en_rx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mlx4/en_rx.c 2012-09-27 00:13:16.099550954 -0700 -@@ -41,6 +41,9 @@ - - #include "mlx4_en.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif /* !DEV_NETMAP */ - - static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, -@@ -365,9 +368,16 @@ int mlx4_en_activate_rx_rings(struct mlx - ring = &priv->rx_ring[ring_ind]; - - ring->size_mask = ring->actual_size - 1; -+#ifdef DEV_NETMAP -+ if (priv->dev->if_capenable & IFCAP_NETMAP) { -+ int saved_cons = ring->cons; -+ mlx4_en_free_rx_buf(priv, ring); -+ ring->cons = saved_cons; -+ mlx4_netmap_rx_config(priv, ring_ind); -+ } -+#endif /* DEV_NETMAP */ - mlx4_en_update_rx_prod_db(ring); - } -- - return 0; - - err_buffers: -@@ -402,6 +412,11 @@ void mlx4_en_destroy_rx_ring(struct mlx4 - void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) - { -+#ifdef DEV_NETMAP -+ if (priv->dev->if_capenable & IFCAP_NETMAP) -+ ND("netmap mode, rx buf already freed"); -+ else -+#endif /* DEV_NETMAP */ - mlx4_en_free_rx_buf(priv, ring); - if (ring->stride <= TXBB_SIZE) - ring->buf -= TXBB_SIZE; -@@ -692,6 +707,12 @@ out: - wmb(); /* ensure HW sees CQ consumer before we post new buffers */ - ring->cons = mcq->cons_index; - ring->prod += polled; /* Polled descriptors were realocated in place */ -+ -+ ND(5, "set_ci %d 0x%p val %d prod_db 0x%p val %d", -+ cq->ring, -+ mcq->set_ci_db, mcq->cons_index & 0xffffff, -+ ring->wqres.db.db, ring->prod & 0xffff); -+ - mlx4_en_update_rx_prod_db(ring); - ring->csum_ok += csum_ok; - ring->csum_none += csum_none; -@@ -718,6 +739,13 @@ int mlx4_en_poll_rx_cq(struct napi_struc - struct mlx4_en_priv *priv = netdev_priv(dev); - int done; - -+#ifdef DEV_NETMAP -+ static int cnt = 0; -+ ND(5,"XXXXXX-------XXXXXXXXXXX-------- poll-rx-cq %d count %d", (int)cq->ring, cnt++); -+ if (netmap_rx_irq(cq->dev, cq->ring, &done)) { -+ ND("rx_irq %d for netmap, budget %d done %d", cq->ring, budget, done); -+ } else -+#endif /* DEV_NETMAP */ - done = mlx4_en_process_rx_cq(dev, cq, budget); - - /* If we used up all the quota - we're probably not done yet... */ ---- drivers/net/ethernet/mellanox/mlx4/en_tx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mlx4/en_tx.c 2012-09-27 00:05:22.713523348 -0700 -@@ -55,6 +55,10 @@ MODULE_PARM_DESC(inline_thold, "threshol - - static u32 hashrnd __read_mostly; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, u32 size, - u16 stride) -@@ -396,6 +400,17 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) - - if (!spin_trylock(&ring->comp_lock)) - return; -+#ifdef DEV_NETMAP // XXX unlock and return should be in the 'if' branch -+ static int cnt = 0; -+ ND(5,"XXXXXX-------XXXXXXXXXXX-------- tx-irq %d count %d", (int)cq->ring, cnt++); -+ if (netmap_tx_irq(cq->dev, cq->ring)) { -+ ND(5, "wakeup queue %d", cq->ring); -+ } else { -+ RD(5, "XXXXXXXXX tx_irq %d unexpected, ignoring", cq->ring); -+ } -+ spin_unlock(&ring->comp_lock); -+ return; -+#endif /* DEV_NETMAP */ - mlx4_en_process_tx_cq(cq->dev, cq); - mod_timer(&cq->timer, jiffies + 1); - spin_unlock(&ring->comp_lock); diff --git a/netmap/Makefile b/netmap/Makefile deleted file mode 100644 index ec5dab5..0000000 --- a/netmap/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# $Id$ -# targets to build tarballs and diffs - -# build a distribution - -RELEASE_SRCS := ./sys/net ./sys/dev ./sys/modules ./examples -RELEASE_SRCS += ./README* ./LINUX ./OSX -RELEASE_EXCL := --exclude .svn --exclude examples/testmod -RELEASE_EXCL += --exclude connlib\* -RELEASE_EXCL += --exclude if_epair.diff -#RELEASE_EXCL += --exclude \*-patches -RELEASE_EXCL += --exclude \*bnx2x\* --exclude \*mellanox\* --exclude \*mlx4\* -RELEASE_EXCL += --exclude OSX - -all: - @echo "What do you want to do ?" - - -diff-head: - (cd ~/FreeBSD/head ; \ - svn diff sys/conf sys/dev sbin/ifconfig ) > head-netmap.diff - -# XXX remember to patch sbin/ifconfig if not done yet -diff-r8: - (cd ~/FreeBSD/RELENG_8 ; \ - svn diff sys/conf sys/dev sbin/ifconfig ) > r8-netmap.diff - -release: - D=`date +%Y%m%d` && tar cvzf /tmp/$${D}-netmap.tgz \ - -s'/^./netmap-release/' $(RELEASE_EXCL) $(RELEASE_SRCS) diff --git a/netmap/PORTING b/netmap/PORTING deleted file mode 100644 index 6ad13a7..0000000 --- a/netmap/PORTING +++ /dev/null @@ -1,131 +0,0 @@ -# $Id$ - -Adding netmap support to network device drivers ------------------------------------------------- - -Netmap requires some small modifications to device drivers -to support the new API. You will need to add small patches -in 3-4 places in the original source, and implement typically -5 new functions. - -Device driver patches ------------------------- -+ in the initial part of the source, after the device-specific - headers and prototypes have been declared, add the following -
-	+#if defined(DEV_NETMAP) || defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
-	+#include 
-	+#endif /* !DEV_NETMAP */
-    
- The place is typically ... in FreeBSD, and - ... on Linux. - - The header really contains the new functions that implement - the netmap API. Including them inline simplifies the building - as it does not require to insert additional dependencies in the - build system. - - On FreeBSD DEV_NETMAP is sufficient to detect whether netmap extensions - should be compiled in, whereas CONFIG_NETMAP and CONFIG_NETMAP_MODULE - are the Linux equivalent. - - If a driver is made of multiple source files, you will need to include - the additional header in all the (few) patched files, preferably using - a macro such as NETMAP_FOO_MAIN to indicate the file where the - new functions should be compiled in. - -+ near the end of the attach routine, once the ifnet/net_device structure - has been filled and initialized, add -
-	+#ifdef DEV_NETMAP
-	+	foo_netmap_attach(adapter);
-	+#endif /* DEV_NETMAP */
-    
- The argument is either the ifnet or the private device descriptor. - This is in foo_attach() on FreeBSD, and somewhere in the path of - XXX foo_open() in Linux - -+ near the code called on device removal, add -
-	+#ifdef DEV_NETMAP
-	+	netmap_detach(ifp);
-	+#endif /* DEV_NETMAP */
-    
- -+ after the tx/rx rings have been initialized, add a patch like this: -
-	+#ifdef DEV_NETMAP
-	+	foo_netmap_config(priv);
-	+#endif /* DEV_NETMAP */
-    
- The argument is typically the private device descriptor, or even - the struct ifnet/net_device. - -+ in the interrupt dispatch routines, something like -
-	+#ifdef DEV_NETMAP
-	+       int dummy;
-	+       if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, &dummy))
-	+               return true;
-	+#endif /* DEV_NETMAP */
-	...
-	+#ifdef DEV_NETMAP
-	+       if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
-	+               return true; /* seems to be ignored */
-	+#endif /* DEV_NETMAP */
-     
- to skip the normal processing and instead wake up the process in - charge of doing I/O - -New functions ----------------- -The new functions serve to register the netmap-enabled device driver, -support the enable/disable of netmap mode, attach netmap buffers to the -NIC rings, and finally implement the handlers (*_txsync(), *_rxsync()) -called by the system calls. - -* foo_netmap_attach() - This is a relatively mechanical function. The purpose is to fetch from - the device descriptor information on the number of rings and buffers, - the way locks are used, and invoke netmap_attach(). - -* foo_netmap_config() - This function is in charge of (over)writing the NIC rings with - pointers to the netmap buffers. Although this is device dependent, - we can often ignore the locking issue and expect that the locking is - already taken care of by the caller. - - foo_netmap_config() only needs to run if the card is in netmap mode. - A quick way to check is to call netmap_ring_init() on one of the rings, - if the function returns NULL we can immediately exit. - Otherwise, we should run a couple of nested loops (on the rings, - and then on the buffers) to fill the NIC descriptors with the - addresses of the (preallocated) netmap buffers. - - For the TX rings this can even be a no-op because these rings are - typically uninitialized, and the pointers can be overridden in the - txsync() routine. - - For the receive ring, the operation is more critical because the - buffers should be available by the time the NIC is enabled. - - Note that the device driver typically maintains head and tail pointers - to indicate which buffers are used. It might be convenient to retain - these indexes because may of the support routines, watchdogs etc. - depends on their values. - - We should note that, especially on the receive ring, there might be - an offset between the indexes used in the netmap ring and those used - in the NIC ring (which might even be non-contiguous). - -* foo_netmap_reg() - support entering/exiting of netmap mode. Typically, lock, stop the device, - set/clear the netmap flag, and restart the device. - An unfortunate side effect of stopping and restarting the device is that - in many drivers the link is reinitialized, causing long delays for the - speed negotiations and spanning tree setup. - - -* foo_netmap_txsync() - -* foo_netmap_rxsync() diff --git a/netmap/README b/netmap/README deleted file mode 100644 index f41e752..0000000 --- a/netmap/README +++ /dev/null @@ -1,241 +0,0 @@ - Netmap - a framework for fast packet I/O - VALE - a Virtual Local Ethernet using the netmap API -======================================================================== - -NETMAP is a framework for very fast packet I/O from userspace. -VALE is an equally fast in-kernel software switch using the netmap API. -Both are implemented as a single kernel module for FreeBSD and Linux, -and can deal with line rate on real or emulated 10 Gbit ports. -See details at - - http://info.iet.unipi.it/~luigi/netmap/ - -In this directory you can find source code (BSD-Copyright) for FreeBSD -and Linux. Note that recent FreeBSD distributions already include both -NETMAP and VALE. - -For more details please look at the manpage (netmap.4) and -netmap home page above. - - -What is this good for ---------------------- -Netmap is mostly useful for userspace applications that must deal with raw -packets: traffic generators, sinks, monitors, loggers, software switches -and routers, generic middleboxes, interconnection of virtual machines. - -In this distribution you will find some example userspace code to build -a generator, a sink, and a simple bridge. The kernel module implements a -learning ethernet bridge. We also include patches for some applications -(noticeably libpcap) so you can run any libpcap client on top of netmap -hopefully at a higher speed. - -Netmap alone DOES NOT accelerate your TCP. For that you need to implement -your own tcp/ip stack probably using some of the techniques indicated -below to reduce the processing costs. - -Architecture ------------- -netmap uses a number of techniques to establish a fast and efficient path -between applications and the network. In order of importance: - - 1. I/O batching - 2. efficient device drivers - 3. pre-allocated tx/rx buffers - 4. memory mapped buffers - -Despite the name, memory mapping is NOT the key feature for netmap's -speed; systems that do not apply all these techniques do not achieve -the same speed _and_ efficiency. - -Netmap clients use a select()-able file descriptor to synchronize -with the network card/software switch, and exchange multiple packets -per system call through device-independent memory mapped buffers and -descriptors. Device drivers are completely in the kernel, and the system -does not rely on IOMMU or other special mechanisms. - - - -Installation instructions -------------------------- -A kernel module (netmap.ko or netmap_lin.ko) implements the core -NETMAP routines and the VALE switch. -Netmap-aware device drivers are needed to use netmap on ethernet ports. -To date, we have support for Intel ixgbe (10G), e1000/e1000e/igb (1G), -Realtek 8169 (1G) and Nvidia (1G). - -If you do not have a supported device, you can still try out netmap -(with reduced performance) because the main kernel module emulates -the netmap API on top of standard device drivers. - - FreeBSD instructions: - --------------------- - Since recent FreeBSD distributions already include netmap, you only - need build the new kernel or modules as below: - - + add 'device netmap' to your kernel config file and rebuild a kernel. - This will include the netmap module and netmap support in the device - drivers. Alternatively, you can build standalone modules - (netmap, ixgbe, em, lem, re, igb) - + sample applications are in the examples/ directory in this archive, - or in src/tools/tools/netmap/ in FreeBSD distributions - - Linux instructions: - ------------------- - On Linux, netmap is an out-of-tree module, so you need to compile it - from these sources. The Makefile in the LINUX/ directory will also - let you patch device driver sources and build some netmap-enabled - device drivers. - + make sure you have kernel sources matching your installed kernel - (headers only suffice, if you want NETMAP/VALE but no drivers) - - + build kernel modules and sample applications: - If kernel sources are in /foo//linux-A.B.C/ , then you should do - - cd netmap/LINUX - # build kernel modules - make NODRIVERS=1 KSRC=/foo/linux-A.B.C/ # only netmap - make KSRC=/a/b/c/linux-A.B.C/ # netmap+device drivers - # build sample applications - make KSRC=/a/b/c/linux-A.B.C/ apps # builds sample applications - - You can omit KSRC if your kernel sources are in a standard place. - - -Applications ------------- -The directory examples/ contains some programs that use the netmap API - - pkt-gen.c a packet generator/receiver working at line rate at 10Gbit/s - vale-cfg.c utility to configure ports of a VALE switch - bridge.c a utility that bridges two interfaces or one interface - with the host stack - -For libpcap and other applications look at the extra/ directory. - -Testing -------- -pkt-gen is a generic test program which can act as a sender or receiver. -It has a large number of options, but the simplest form is: - - pkt-gen -i ix0 -f rx # receive and print stats - pkt-gen -i ix0 -f tx -l 60 # send a stream of 60-byte packets - -(replace ix0 with the name of the interface or VALE port). -This should be able to work at line rate (up to 14.88 Mpps on 10 -Gbit/interfaces, even higher on VALE) but note the following - -OPERATING SPEED ---------------- -Netmap is able to send packets at very high rates, and for simple -packet transmission and reception, speed generally not limited by -the CPU but by other factors (link speed, bus or NIC hw limitations). - -For a physical link, the maximum numer of packets per second can -be computed with the formula: - - pps = line_rate / (672 + 8 * pkt_size) - -where "line_rate" is the nominal link rate (e.g 10 Gbit/s) and -pkt_size is the actual packet size including MAC headers and CRC. -The following table summarizes some results - - LINE RATE - pkt_size \ 100M 1G 10G 40G - - 64 .1488 1.488 14.88 59.52 - 128 .0589 0.589 5.89 23.58 - 256 .0367 0.367 3.67 14.70 - 512 .0209 0.209 2.09 8.38 - 1024 .0113 0.113 1.13 4.51 - 1518 .0078 0.078 0.78 3.12 - -On VALE ports, there is no physical link and the throughput is -limited by CPU or memory depending on the packet size. - -COMMON PROBLEMS ---------------- -Before reporting slow send or receive speed on a physical interface, -check ALL of the following: - -CANNOT SET THE DEVICE IN NETMAP MODE: - + make sure that the netmap module and drivers are correctly - loaded and can allocate all the memory they need (check into - /var/log/messages or equivalent) - + check permissions on /dev/netmap - + make sure the interface is up before invoking pkt-gen - -SENDER DOES NOT TRANSMIT - + some switches/interfaces take a long time to (re)negotiate - the link after starting pkt-gen; in case, use the -w N option - to increase the initial delay to N seconds; - - This may cause inability to transmit, or lost packets for - the first few seconds of transmission - -RECEIVER DOES NOT RECEIVE - + make sure traffic uses a broadcast MAC addresses, or the UNICAST - address of the receiving interface, or the receiving interface is in - promiscuous mode (this must be done with ifconfig; pkt-gen does not - change the operating mode) - -LOWER SPEED THAN LINE RATE - + check that your CPUs are running at the maximum clock rate - and are not throttled down by the governor/powerd. - - + make sure that the sender/receiver interfaces and switch have - flow control (FC) disabled (either via sysctl or ethtool). - - If FC is enabled and the receiving end is unable to cope - with the traffic, the driver will try to slow down transmission, - sometimes to very low rates. - - + a lot of hardware is not able to sustain line rate. For instance, - ixgbe has problems with receiving frames that are not multiple - of 64 bytes (with/without CRC depending on the driver); also on - transmissions, ixgbe tops at about 12.5 Mpps unless the driver - prefetches tx descriptors. igb does line rate in all configurations. - e1000/e1000e vary between 1.15 and 1.32 Mpps. re/r8169 is - extremely slow in sending (max 4-500 Kpps) - - -Credits -------- -NETMAP and VALE are projects of the Universita` di Pisa, -partially supported by various entities including: -Intel Research Berkeley, EU FP7 projects CHANGE and OPENLAB, -Netapp/Silicon Valley Community Foundation, ICSI - -Author: Luigi Rizzo -Contributors: - Giuseppe Lettieri - Michio Honda - Marta Carbone - Gaetano Catalli - Matteo Landi - Vincenzo Maffione - -References ----------- -There are a few academic papers describing netmap, VALE and applications. -You can find the papers at http://info.iet.unipi.it/~luigi/research.html - -+ Luigi Rizzo, - netmap: a novel framework for fast packet I/O, - Usenix ATC'12, Boston, June 2012 - -+ Luigi Rizzo, - Revisiting network I/O APIs: the netmap framework, - Communications of the ACM 55 (3), 45-51, March 2012 - -+ Luigi Rizzo, Marta Carbone, Gaetano Catalli, - Transparent acceleration of software packet forwarding using netmap, - IEEE Infocom 2012, Orlando, March 2012 - -+ Luigi Rizzo, Giuseppe Lettieri, - VALE: a switched ethernet for virtual machines, - ACM Conext 2012, Nice, Dec. 2012 - -+ Luigi Rizzo, Giuseppe Lettieri, Vincenzo Maffione, - Speeding up packet I/O in virtual machines, - IEEE/ACM ANCS 2013, San Jose, Oct. 2013 diff --git a/netmap/README.images b/netmap/README.images deleted file mode 100644 index c4444ad..0000000 --- a/netmap/README.images +++ /dev/null @@ -1,416 +0,0 @@ - EXPERIMENTING WITH NETMAP, VALE AND FAST QEMU - --------------------------------------------- - -To ease experiments with Netmap, the VALE switch and our Qemu enhancements -we have prepared a couple of bootable images (linux and FreeBSD). -You can find them on the netmap page - - http://info.iet.unipi.it/~luigi/netmap/ - -where you can also look at more recent versions of this file. - -Below are step-by-step instructions on experiments you can run -with these images. The two main versions are - - picobsd.hdd -> FreeBSD HEAD (netmap + VALE) - tinycore.hdd -> Linux (qemu + netmap + VALE) - -Booting the image ------------------ -For all experiments you need to copy the image on a USB stick -and boot a PC with it. Alternatively, you can use the image -with VirtualBox, Qemu or other emulators, as an example - - qemu-system-x86_64 -hda IMAGE_FILE -m 1G -machine accel=kvm ... - -(remove 'accel=kvm' if your host does not support kvm). -The images do not install anything on the hard disk. - -Both systems have preloaded drivers for a number of network cards -(including the intel 10 Gbit ones) with netmap extensions. -The VALE switch is also available (it is part of the netmap module). -ssh, scp and a few other utilities are also included. - -FreeBSD image: - - + the OS boots directly in console mode, you can switch - between terminals with ALT-Fn. - The password for the 'root' account is 'setup' - - + if you are connected to a network, you can use - dhclient em0 # or other interface name - to obtain an IP address and external connectivity. - -Linux image: - - + in addition to the netmap/VALE modules, the KVM kernel module - is also preloaded. - - + the boot-loader gives you two main options (each with - a variant to delay boot in case you have slow devices): - - + "Boot TinyCore" - boots in an X11 environment as user 'tc'. - You can create a few terminals using the icon at the - bottom. You can use "sudo -s" to get root access. - In case no suitable video card is available/detected, - it falls back to command line mode. - - + "Boot Core (command line only)" - boots in console mode with virtual terminals. - You're automatically logged in as user 'tc'. - To log in the other terminals use the same username - (no password required). - - + The system should automatically recognize the existing ethernet - devices, and load the appropriate netmap-capable device drivers - when available. Interfaces are configured through DHCP when possible. - - -General test recommendations ----------------------------- -NOTE: The tests outlined in the following sections can generate very high -packet rates, and some hardware misconfiguration problems may prevent -you from achieving maximum speed. -Common problems are: - -+ slow link autonegotiation. - Our programs typically wait 2-4 seconds for - link negotiation to complete, but some NIC/switch combinations - are much slower. In this case you should increase the delay - (pkt-gen has the -w XX option for that) or possibly force - the link speed and duplex mode on both sides. - - Check the link speed to make sure there are no nogotiation - problems, and that you see the expected speed. - - ethtool IFNAME # on linux - ifconfig IFNAME # on FreeBSD - -+ ethernet flow control. - If the receiving port is slow (often the case in presence - of multicast/broadcast traffic, or also unicast if you are - sending to non-netmap receivers), it will generate ethernet - flow control frames that throttle down the sender. - - We recommend to disable BOTH RX and TX ethernet flow control - on BOTH sender and receiver. - On Linux this can be done with ethtool: - - ethtool -A IFNAME tx off rx off - - whereas on FreeBSD there are device-specific sysctl - - sysctl dev.ix.0.queue0.flow_control = 0 - -+ CPU power saving. - The CPU governor on linux, or equivalent in FreeBSD, tend to - throttle down the clock rate reducing performance. - Unlike other similar systems, netmap does not have busy-wait - loops, so the CPU load is generally low and this can trigger - the clock slowdown. - - Make sure that ALL CPUs run at maximum speed, possibly - disabling the dynamic frequency-scaling mechanisms. - - cpufreq-set -gperformance # on linux - - sysctl dev.cpu.0.freq=3401 # on FreeBSD. - -+ wrong MAC address - netmap does not put the NIC in promiscuous mode, so unless the - application does it, the NIC will only receive broadcast traffic or - unicast directed to its own MAC address. - - -STANDARD SOCKET TESTS ---------------------- -For most socket-based experiments you can use the "netperf" tool installed -on the system (version 2.6.0). Be careful to use a matching version for -the other netperf endpoint (e.g. netserver) when running tests between -different machines. - -Interesting experiments are: - - netperf -H x.y.z.w -tTCP_STREAM # test TCP throughput - netperf -H x.y.z.w -tTCP_RR # test latency - netperf -H x.y.z.w -tUDP_STREAM -- -m8 # test UDP throughput with short packets - -where x.y.z.w is the host running "netserver". - - -RAW SOCKET AND TAP TESTS ------------------------- -For experiments with raw sockets and tap devices you can use the l2 -utilities (l2open, l2send, l2recv) installed on the system. -With these utilities you can send/receive custom network packets -to/from raw sockets or tap file descriptors. - -The receiver can be run with one of the following commands - - l2open -r IFNAME l2recv # receive from a raw socket attached to IFNAME - l2open -t IFNAME l2recv # receive from a file descriptor opened on the tap IFNAME - -The receiver process will wait indefinitely for the first packet -and then keep receiving as long as packets keep coming. When the -flow stops (after a 2 seconds timeout) the process terminates and -prints the received packet rate and packet count. - -To run the sender in an easy way, you can use the script l2-send.sh -in the home directory. This script defines several shell variables -that can be manually changed to customize the test (see -the comments in the script itself). - -As an example, you can test configurations with Virtual -Machines attached to host tap devices bridged together. - - -Tests using the Linux in-kernel pktgen --------------------------------------- -To use the Linux in-kernel packet generator, you can use the -script "linux-pktgen.sh" in the home directory. -The pktgen creates a kernel thread for each hardware TX queue -of a given NIC. - -By manually changing the script shell variable definitions you -can change the test configuration (e.g. addresses in the generated -packet). Please change the "NCPU" variable to match the number -of CPUs on your machine. The script has an argument which -specifies the number of NIC queues (i.e. kernel threads) -to use minus one. - -For example: - - ./linux-pktgen.sh 2 # Uses 3 NIC queues - -When the script terminates, it prints the per-queue rates and -the total rate achieved. - - -NETMAP AND VALE EXPERIMENTS ---------------------------- - -For most experiments with netmap you can use the "pkt-gen" command -(do not confuse it with the Linux in-kernel pktgen), which has a large -number of options to send and receive traffic (also on TAP devices). - -pkt-gen normally generates UDP traffic for a specific IP address -and using the brodadcast MAC address - -Netmap testing with network interfaces --------------------------------------- - -Remember that you need a netmap-capable driver in order to use -netmap on a specific NIC. Currently supported drivers are e1000, -e1000e, ixgbe, igb. For updated information please visit -http://info.iet.unipi.it/~luigi/netmap/ - -Before running pkt-gen, make sure that the link is up. - -Run pkt-gen on an interface called "IFNAME": - - pkt-gen -i IFNAME -f tx # run a pkt-gen sender - pkt-gen -i IFNAME -f rx # run a pkt-gen receiver - -pkt-gen without arguments will show other options, e.g. - + -w sec modifies the wait time for link negotioation - + -l len modifies the packet size - + -d, -s set the IP destination/source addresses and ports - + -D, -S set the MAC destination/source addresses - -and more. - -Testing the VALE switch ------------------------- - -To use the VALE switch instead of physical ports you only need -to change the interface name in the pkt-gen command. -As an example, on a single machine, you can run senders and receivers -on multiple ports of a VALE switch as follows (run the commands into -separate terminals to see the output) - - pkt-gen -ivale0:01 -ftx # run a sender on the port 01 of the switch vale0 - pkt-gen -ivale0:02 -frx # receiver on the port 02 of same switch - pkt-gen -ivale0:03 -ftx # another sender on the port 03 - -The VALE switches and ports are created (and destroyed) on the fly. - - -Transparent connection of physical ports to the VALE switch ------------------------------------------------------------ - -It is also possible to use a network device as a port of a VALE -switch. You can do this with the following command: - - vale-ctl -h vale0:eth0 # attach interface "eth0" to the "vale0" switch - -To detach an interface from a bridge: - - vale-ctl -d vale0:eth0 # detach interface "eth0" from the "vale0" switch - -These operations can be issued at any moment. - - -Tests with our modified QEMU ----------------------------- - -The Linux image also contains our modified QEMU, with the VALE backend and -the "e1000-paravirt" frontend (a paravirtualized e1000 emulation). - -After you have booted the image on a physical machine (so you can exploit -KVM), you can boot the same image a second time (recursively) with QEMU. -Therefore, you can run all the tests above also from within the virtual -machine environment. - -To make VM testing easier, the home directory contains some -some useful scripts to set up and launch VMs on the physical machine. - -+ "prep-taps.sh" - creates and sets up two permanent tap interfaces ("tap01" and "tap02") - and a Linux in-kernel bridge. The tap interfaces are then bridged - together on the same bridge. The bridge interface ("br0"), is given - the address 10.0.0.200/24. - - This setup can be used to make two VMs communicate through the - host bridge, or to test the speed of a linux switch using - l2open - -+ "unprep-taps.sh" - undoes the above setup. - -+ "launch-qemu.sh" - can be used to run QEMU virtual machines. It takes four arguments: - - + The first argument can be "qemu" or "kvm", depending on - whether we want to use the standard QEMU binary translation - or the hardware virtualization acceleration. - - + The third argument can be "--tap", "--netuser" or "--vale", - and tells QEMU what network backend to use: a tap device, - the QEMU user networking (slirp), or a VALE switch port. - - + When the third argument is "--tap" or "--vale", the fourth - argument specifies an index (e.g. "01", "02", etc..) which - tells QEMU what tap device or VALE port to use as backend. - - You can manually modify the script to set the shell variables that - select the type of emulated device (e.g. e1000, virtio-net-pci, ...) - and related options (ioeventfd, virtio vhost, e1000 mitigation, ....). - - The default setup has an "e1000" device with interrupt mitigation - disabled. - -You can try the paravirtualized e1000 device ("e1000-paravirt") -or the "virtio-net" device to get better performance. However, bear -in mind that these paravirtualized devices don't have netmap support -(whereas the standard e1000 does have netmap support). - -Examples: - - # Run a kvm VM attached to the port 01 of a VALE switch - ./launch-qemu.sh kvm --vale 01 - - # Run a kvm VM attached to the port 02 of the same VALE switch - ./launch-qemu.sh kvm --vale 02 - - # Run a kvm VM attached to the tap called "tap01" - ./launch-qemu.sh kvm --tap 01 - - # Run a kvm VM attached to the tap called "tap02" - ./launch-qemu.sh kvm --tap 02 - - -Guest-to-guest tests --------------------- - -If you run two VMs attached to the same switch (which can be a Linux -bridge or a VALE switch), you can run guest-to-guest experiments. - -All the tests reported in the previous sections are possible (normal -sockets, raw sockets, pkt-gen, ...), indipendently of the backend used. - -In the following examples we assume that: - - + Each VM has an ethernet interface called "eth0". - - + The interface of the first VM is given the IP 10.0.0.1/24. - - + The interface of the second VM is given the IP 10.0.0.2/24. - - + The Linux bridge interface "br0" on the host is given the - IP 10.0.0.200/24. - -Examples: - - [1] ### Test UDP short packets over traditional sockets ### - # On the guest 10.0.0.2 run - netserver - # on the guest 10.0.0.1 run - netperf -H10.0.0.2 -tUDP_STREAM -- -m8 - - [2] ### Test UDP short packets with pkt-gen ### - # On the guest 10.0.0.2 run - pkt-gen -ieth0 -frx - # On the guest 10.0.0.1 run - pkt-gen -ieth0 -ftx - - [3] ### Test guest-to-guest latency ### - # On the guest 10.0.0.2 run - netserver - # On the guest 10.0.0.1 run - netperf -H10.0.0.2 -tTCP_RR - -Note that you can use pkt-gen into a VM only if the emulated ethernet -device is supported by netmap. The default emulated device is -"e1000", which has netmap support. If you try to run pkt-gen on -an unsupported device, pkt-gen will not work, reporting that it is -unable to register the interface. - - -Guest-to-host tests (follows from the previous section) -------------------------------------------------------- - -If you run only a VM on your host machine, you can measure the -network performance between the VM and the host machine. In this -case the experiment setup depends on the backend you are using. - -With the tap backend, you can use the bridge interface "br0" as a -communication endpoint. You can run normal/raw sockets experiments, -but you cannot use pkt-gen on the "br0" interface, since the Linux -bridge interface is not supported by netmap. - -Examples with the tap backend: - - [1] ### Test TCP throughput over traditional sockets ### - # On the host run - netserver - # on the guest 10.0.0.1 run - netperf -H10.0.0.200 -tTCP_STREAM - - [2] ### Test UDP short packets with pkt-gen and l2 ### - # On the host run - l2open -r br0 l2recv - # On the guest 10.0.0.1 run (xx:yy:zz:ww:uu:vv is the - # "br0" hardware address) - pkt-gen -ieth0 -ftx -d10.0.0.200:7777 -Dxx:yy:zz:ww:uu:vv - - -With the VALE backend you can perform only UDP tests, since we don't have -a netmap application which implements a TCP endpoint: pkt-gen generates -UDP packets. -As a communication endpoint on the host, you can use a virtual VALE port -opened on the fly by a pkt-gen instance. - -Examples with the VALE backend: - - [1] ### Test UDP short packets ### - # On the host run - pkt-gen -ivale0:99 -frx - # On the guest 10.0.0.1 run - pkt-gen -ieth0 -ftx - - [2] ### Test UDP big packets (receiver on the guest) ### - # On the guest 10.0.0.1 run - pkt-gen -ieth0 -frx - # On the host run pkt-gen -ivale0:99 -ftx -l1460 - diff --git a/netmap/examples/GNUmakefile b/netmap/examples/GNUmakefile deleted file mode 100644 index 439846f..0000000 --- a/netmap/examples/GNUmakefile +++ /dev/null @@ -1,43 +0,0 @@ -# For multiple programs using a single source file each, -# we can just define 'progs' and create custom targets. -PROGS = pkt-gen bridge vale-ctl -#PROGS += pingd -PROGS += testlock test_select testmmap vale-ctl -LIBNETMAP = - -CLEANFILES = $(PROGS) *.o -NO_MAN= -CFLAGS = -O2 -pipe -CFLAGS += -Werror -Wall -Wunused-function -CFLAGS += -I ../sys # -I/home/luigi/FreeBSD/head/sys -I../sys -CFLAGS += -Wextra -ifdef WITH_PCAP -# do not use pcap by default, as it is not always available on linux -LDLIBS += -lpcap -else -CFLAGS += -DNO_PCAP -endif - -LDLIBS += -lpthread -ifeq ($(shell uname),Linux) - LDLIBS += -lrt # on linux -endif -#SRCS = pkt-gen.c - -all: $(PROGS) - -kern_test: testmod/kern_test.c - -pkt-gen: pkt-gen.o - -bridge: bridge.o - -vale-ctl: vale-ctl.o - -%-pic.o: %.c - $(CC) $(CFLAGS) -fpic -c $^ -o $@ - -clean: - -@rm -rf $(CLEANFILES) - -testlock: testlock.c diff --git a/netmap/examples/Makefile b/netmap/examples/Makefile deleted file mode 100644 index 8614074..0000000 --- a/netmap/examples/Makefile +++ /dev/null @@ -1,41 +0,0 @@ -# For multiple programs using a single source file each, -# we can just define 'progs' and create custom targets. -PROGS = pkt-gen bridge vale-ctl -#PROGS += pingd -PROGS += testlock test_select testmmap -MORE_PROGS = kern_test - -CLEANFILES = $(PROGS) *.o -NO_MAN= -CFLAGS = -O2 -pipe -CFLAGS += -Werror -Wall -Wunused-function -CFLAGS += -I ../sys # -I/home/luigi/FreeBSD/head/sys -I../sys -CFLAGS += -Wextra -.ifdef WITH_PCAP -LDFLAGS += -lpcap -.else -CFLAGS += -DNO_PCAP -.endif - -LDFLAGS += -lpthread -LDFLAGS += -lrt # needed on linux, does not harm on BSD -#SRCS = pkt-gen.c - -all: $(PROGS) - -kern_test: testmod/kern_test.c - -pkt-gen: pkt-gen.o - $(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS) - -bridge: bridge.o - $(CC) $(CFLAGS) -o bridge bridge.o - -vale-ctl: vale-ctl.o - $(CC) $(CFLAGS) -o vale-ctl vale-ctl.o - -clean: - -@rm -rf $(CLEANFILES) - -testlock: testlock.c - $(CC) $(CFLAGS) -o testlock testlock.c -lpthread $(LDFLAGS) diff --git a/netmap/examples/README b/netmap/examples/README deleted file mode 100644 index 8bde20a..0000000 --- a/netmap/examples/README +++ /dev/null @@ -1,11 +0,0 @@ -$FreeBSD: head/tools/tools/netmap/README 227614 2011-11-17 12:17:39Z luigi $ - -This directory contains examples that use netmap - - pkt-gen a packet sink/source using the netmap API - - bridge a two-port jumper wire, also using the native API - - testpcap a jumper wire using libnetmap (or libpcap) - - click* various click examples diff --git a/netmap/examples/bridge.c b/netmap/examples/bridge.c deleted file mode 100644 index 98b2a1f..0000000 --- a/netmap/examples/bridge.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * (C) 2011-2014 Luigi Rizzo, Matteo Landi - * - * BSD license - * - * A netmap client to bridge two network interfaces - * (or one interface and the host stack). - * - * $FreeBSD: head/tools/tools/netmap/bridge.c 228975 2011-12-30 00:04:11Z uqs $ - */ - -#include -#define NETMAP_WITH_LIBS -#include -#include - -int verbose = 0; - -static int do_abort = 0; -static int zerocopy = 1; /* enable zerocopy if possible */ - -static void -sigint_h(int sig) -{ - (void)sig; /* UNUSED */ - do_abort = 1; - signal(SIGINT, SIG_DFL); -} - - -/* - * how many packets on this set of queues ? - */ -int -pkt_queued(struct nm_desc *d, int tx) -{ - u_int i, tot = 0; - - if (tx) { - for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) { - tot += nm_ring_space(NETMAP_TXRING(d->nifp, i)); - } - } else { - for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) { - tot += nm_ring_space(NETMAP_RXRING(d->nifp, i)); - } - } - return tot; -} - -/* - * move up to 'limit' pkts from rxring to txring swapping buffers. - */ -static int -process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, - u_int limit, const char *msg) -{ - u_int j, k, m = 0; - - /* print a warning if any of the ring flags is set (e.g. NM_REINIT) */ - if (rxring->flags || txring->flags) - D("%s rxflags %x txflags %x", - msg, rxring->flags, txring->flags); - j = rxring->cur; /* RX */ - k = txring->cur; /* TX */ - m = nm_ring_space(rxring); - if (m < limit) - limit = m; - m = nm_ring_space(txring); - if (m < limit) - limit = m; - m = limit; - while (limit-- > 0) { - struct netmap_slot *rs = &rxring->slot[j]; - struct netmap_slot *ts = &txring->slot[k]; - - /* swap packets */ - if (ts->buf_idx < 2 || rs->buf_idx < 2) { - D("wrong index rx[%d] = %d -> tx[%d] = %d", - j, rs->buf_idx, k, ts->buf_idx); - sleep(2); - } - /* copy the packet length. */ - if (rs->len > 2048) { - D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k); - rs->len = 0; - } else if (verbose > 1) { - D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k); - } - ts->len = rs->len; - if (zerocopy) { - uint32_t pkt = ts->buf_idx; - ts->buf_idx = rs->buf_idx; - rs->buf_idx = pkt; - /* report the buffer change. */ - ts->flags |= NS_BUF_CHANGED; - rs->flags |= NS_BUF_CHANGED; - } else { - char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx); - char *txbuf = NETMAP_BUF(txring, ts->buf_idx); - nm_pkt_copy(rxbuf, txbuf, ts->len); - } - j = nm_ring_next(rxring, j); - k = nm_ring_next(txring, k); - } - rxring->head = rxring->cur = j; - txring->head = txring->cur = k; - if (verbose && m > 0) - D("%s sent %d packets to %p", msg, m, txring); - - return (m); -} - -/* move packts from src to destination */ -static int -move(struct nm_desc *src, struct nm_desc *dst, u_int limit) -{ - struct netmap_ring *txring, *rxring; - u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring; - const char *msg = (src->req.nr_ringid & NETMAP_SW_RING) ? - "host->net" : "net->host"; - - while (si <= src->last_rx_ring && di <= dst->last_tx_ring) { - rxring = NETMAP_RXRING(src->nifp, si); - txring = NETMAP_TXRING(dst->nifp, di); - ND("txring %p rxring %p", txring, rxring); - if (nm_ring_empty(rxring)) { - si++; - continue; - } - if (nm_ring_empty(txring)) { - di++; - continue; - } - m += process_rings(rxring, txring, limit, msg); - } - - return (m); -} - - -static void -usage(void) -{ - fprintf(stderr, - "usage: bridge [-v] [-i ifa] [-i ifb] [-b burst] [-w wait_time] [iface]\n"); - exit(1); -} - -/* - * bridge [-v] if1 [if2] - * - * If only one name, or the two interfaces are the same, - * bridges userland and the adapter. Otherwise bridge - * two intefaces. - */ -int -main(int argc, char **argv) -{ - struct pollfd pollfd[2]; - int ch; - u_int burst = 1024, wait_link = 4; - struct nm_desc *pa = NULL, *pb = NULL; - char *ifa = NULL, *ifb = NULL; - char ifabuf[64] = { 0 }; - - fprintf(stderr, "%s built %s %s\n", - argv[0], __DATE__, __TIME__); - - while ( (ch = getopt(argc, argv, "b:ci:vw:")) != -1) { - switch (ch) { - default: - D("bad option %c %s", ch, optarg); - usage(); - break; - case 'b': /* burst */ - burst = atoi(optarg); - break; - case 'i': /* interface */ - if (ifa == NULL) - ifa = optarg; - else if (ifb == NULL) - ifb = optarg; - else - D("%s ignored, already have 2 interfaces", - optarg); - break; - case 'c': - zerocopy = 0; /* do not zerocopy */ - break; - case 'v': - verbose++; - break; - case 'w': - wait_link = atoi(optarg); - break; - } - - } - - argc -= optind; - argv += optind; - - if (argc > 1) - ifa = argv[1]; - if (argc > 2) - ifb = argv[2]; - if (argc > 3) - burst = atoi(argv[3]); - if (!ifb) - ifb = ifa; - if (!ifa) { - D("missing interface"); - usage(); - } - if (burst < 1 || burst > 8192) { - D("invalid burst %d, set to 1024", burst); - burst = 1024; - } - if (wait_link > 100) { - D("invalid wait_link %d, set to 4", wait_link); - wait_link = 4; - } - if (!strcmp(ifa, ifb)) { - D("same interface, endpoint 0 goes to host"); - snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa); - ifa = ifabuf; - } else { - /* two different interfaces. Take all rings on if1 */ - } - pa = nm_open(ifa, NULL, 0, NULL); - if (pa == NULL) { - D("cannot open %s", ifa); - return (1); - } - // XXX use a single mmap ? - pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa); - if (pb == NULL) { - D("cannot open %s", ifb); - nm_close(pa); - return (1); - } - zerocopy = zerocopy && (pa->mem == pb->mem); - D("------- zerocopy %ssupported", zerocopy ? "" : "NOT "); - - /* setup poll(2) variables. */ - memset(pollfd, 0, sizeof(pollfd)); - pollfd[0].fd = pa->fd; - pollfd[1].fd = pb->fd; - - D("Wait %d secs for link to come up...", wait_link); - sleep(wait_link); - D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.", - pa->req.nr_name, pa->first_rx_ring, pa->req.nr_rx_rings, - pb->req.nr_name, pb->first_rx_ring, pb->req.nr_rx_rings); - - /* main loop */ - signal(SIGINT, sigint_h); - while (!do_abort) { - int n0, n1, ret; - pollfd[0].events = pollfd[1].events = 0; - pollfd[0].revents = pollfd[1].revents = 0; - n0 = pkt_queued(pa, 0); - n1 = pkt_queued(pb, 0); - if (n0) - pollfd[1].events |= POLLOUT; - else - pollfd[0].events |= POLLIN; - if (n1) - pollfd[0].events |= POLLOUT; - else - pollfd[1].events |= POLLIN; - ret = poll(pollfd, 2, 2500); - if (ret <= 0 || verbose) - D("poll %s [0] ev %x %x rx %d@%d tx %d," - " [1] ev %x %x rx %d@%d tx %d", - ret <= 0 ? "timeout" : "ok", - pollfd[0].events, - pollfd[0].revents, - pkt_queued(pa, 0), - NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur, - pkt_queued(pa, 1), - pollfd[1].events, - pollfd[1].revents, - pkt_queued(pb, 0), - NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur, - pkt_queued(pb, 1) - ); - if (ret < 0) - continue; - if (pollfd[0].revents & POLLERR) { - struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring); - D("error on fd0, rx [%d,%d,%d)", - rx->head, rx->cur, rx->tail); - } - if (pollfd[1].revents & POLLERR) { - struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring); - D("error on fd1, rx [%d,%d,%d)", - rx->head, rx->cur, rx->tail); - } - if (pollfd[0].revents & POLLOUT) { - move(pb, pa, burst); - // XXX we don't need the ioctl */ - // ioctl(me[0].fd, NIOCTXSYNC, NULL); - } - if (pollfd[1].revents & POLLOUT) { - move(pa, pb, burst); - // XXX we don't need the ioctl */ - // ioctl(me[1].fd, NIOCTXSYNC, NULL); - } - } - D("exiting"); - nm_close(pb); - nm_close(pa); - - return (0); -} diff --git a/netmap/examples/click-test.cfg b/netmap/examples/click-test.cfg deleted file mode 100644 index d95ab4a..0000000 --- a/netmap/examples/click-test.cfg +++ /dev/null @@ -1,19 +0,0 @@ -// -// $FreeBSD: head/tools/tools/netmap/click-test.cfg 227614 2011-11-17 12:17:39Z luigi $ -// -// A sample test configuration for click -// -// -// create a switch - -myswitch :: EtherSwitch; - -// two input devices - -c0 :: FromDevice(ix0, PROMISC true); -c1 :: FromDevice(ix1, PROMISC true); - -// and now pass packets around - -c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0); -c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1); diff --git a/netmap/examples/pkt-gen.c b/netmap/examples/pkt-gen.c deleted file mode 100644 index 3eefc4c..0000000 --- a/netmap/examples/pkt-gen.c +++ /dev/null @@ -1,1900 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/tools/tools/netmap/pkt-gen.c 231198 2012-02-08 11:43:29Z luigi $ - * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ - * - * Example program to show how to build a multithreaded packet - * source/sink using the netmap device. - * - * In this example we create a programmable number of threads - * to take care of all the queues of the interface used to - * send or receive traffic. - * - */ - -#define _GNU_SOURCE /* for CPU_SET() */ -#include -#define NETMAP_WITH_LIBS -#include - - -#include // isprint() -#include // sysconf() -#include -#include /* ntohs */ -#include /* sysctl */ -#include /* getifaddrs */ -#include -#include -#include -#include - -#include - -#ifndef NO_PCAP -#include -#endif - -#ifdef linux - -#define cpuset_t cpu_set_t - -#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ -#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ -#include -#include - -#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME -#include /* ether_aton */ -#include /* sockaddr_ll */ -#endif /* linux */ - -#ifdef __FreeBSD__ -#include /* le64toh */ -#include - -#include /* pthread w/ affinity */ -#include /* cpu_set */ -#include /* LLADDR */ -#endif /* __FreeBSD__ */ - -#ifdef __APPLE__ - -#define cpuset_t uint64_t // XXX -static inline void CPU_ZERO(cpuset_t *p) -{ - *p = 0; -} - -static inline void CPU_SET(uint32_t i, cpuset_t *p) -{ - *p |= 1<< (i & 0x3f); -} - -#define pthread_setaffinity_np(a, b, c) ((void)a, 0) - -#define ifr_flagshigh ifr_flags // XXX -#define IFF_PPROMISC IFF_PROMISC -#include /* LLADDR */ -#define clock_gettime(a,b) \ - do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) -#endif /* __APPLE__ */ - -const char *default_payload="netmap pkt-gen DIRECT payload\n" - "http://info.iet.unipi.it/~luigi/netmap/ "; - -const char *indirect_payload="netmap pkt-gen indirect payload\n" - "http://info.iet.unipi.it/~luigi/netmap/ "; - -int verbose = 0; - -#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ - - -#define VIRT_HDR_1 10 /* length of a base vnet-hdr */ -#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ -#define VIRT_HDR_MAX VIRT_HDR_2 -struct virt_header { - uint8_t fields[VIRT_HDR_MAX]; -}; - -struct pkt { - struct virt_header vh; - struct ether_header eh; - struct ip ip; - struct udphdr udp; - uint8_t body[2048]; // XXX hardwired -} __attribute__((__packed__)); - -struct ip_range { - char *name; - uint32_t start, end; /* same as struct in_addr */ - uint16_t port0, port1; -}; - -struct mac_range { - char *name; - struct ether_addr start, end; -}; - -/* ifname can be netmap:foo-xxxx */ -#define MAX_IFNAMELEN 64 /* our buffer for ifname */ -/* - * global arguments for all threads - */ - -struct glob_arg { - struct ip_range src_ip; - struct ip_range dst_ip; - struct mac_range dst_mac; - struct mac_range src_mac; - int pkt_size; - int burst; - int forever; - int npackets; /* total packets to send */ - int frags; /* fragments per packet */ - int nthreads; - int cpus; - int options; /* testing */ -#define OPT_PREFETCH 1 -#define OPT_ACCESS 2 -#define OPT_COPY 4 -#define OPT_MEMCPY 8 -#define OPT_TS 16 /* add a timestamp */ -#define OPT_INDIRECT 32 /* use indirect buffers, tx only */ -#define OPT_DUMP 64 /* dump rx/tx traffic */ - int dev_type; -#ifndef NO_PCAP - pcap_t *p; -#endif - - int tx_rate; - struct timespec tx_period; - - int affinity; - int main_fd; - struct nm_desc *nmd; - uint64_t nmd_flags; - int report_interval; /* milliseconds between prints */ - void *(*td_body)(void *); - void *mmap_addr; - char ifname[MAX_IFNAMELEN]; - char *nmr_config; - int dummy_send; - int virt_header; /* send also the virt_header */ - int extra_bufs; /* goes in nr_arg3 */ -}; -enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; - - -/* - * Arguments for a new thread. The same structure is used by - * the source and the sink - */ -struct targ { - struct glob_arg *g; - int used; - int completed; - int cancel; - int fd; - struct nm_desc *nmd; - volatile uint64_t count; - struct timespec tic, toc; - int me; - pthread_t thread; - int affinity; - - struct pkt pkt; -}; - - -/* - * extract the extremes from a range of ipv4 addresses. - * addr_lo[-addr_hi][:port_lo[-port_hi]] - */ -static void -extract_ip_range(struct ip_range *r) -{ - char *ap, *pp; - struct in_addr a; - - if (verbose) - D("extract IP range from %s", r->name); - r->port0 = r->port1 = 0; - r->start = r->end = 0; - - /* the first - splits start/end of range */ - ap = index(r->name, '-'); /* do we have ports ? */ - if (ap) { - *ap++ = '\0'; - } - /* grab the initial values (mandatory) */ - pp = index(r->name, ':'); - if (pp) { - *pp++ = '\0'; - r->port0 = r->port1 = strtol(pp, NULL, 0); - }; - inet_aton(r->name, &a); - r->start = r->end = ntohl(a.s_addr); - if (ap) { - pp = index(ap, ':'); - if (pp) { - *pp++ = '\0'; - if (*pp) - r->port1 = strtol(pp, NULL, 0); - } - if (*ap) { - inet_aton(ap, &a); - r->end = ntohl(a.s_addr); - } - } - if (r->port0 > r->port1) { - uint16_t tmp = r->port0; - r->port0 = r->port1; - r->port1 = tmp; - } - if (r->start > r->end) { - uint32_t tmp = r->start; - r->start = r->end; - r->end = tmp; - } - { - struct in_addr a; - char buf1[16]; // one ip address - - a.s_addr = htonl(r->end); - strncpy(buf1, inet_ntoa(a), sizeof(buf1)); - a.s_addr = htonl(r->start); - if (1) - D("range is %s:%d to %s:%d", - inet_ntoa(a), r->port0, buf1, r->port1); - } -} - -static void -extract_mac_range(struct mac_range *r) -{ - if (verbose) - D("extract MAC range from %s", r->name); - bcopy(ether_aton(r->name), &r->start, 6); - bcopy(ether_aton(r->name), &r->end, 6); -#if 0 - bcopy(targ->src_mac, eh->ether_shost, 6); - p = index(targ->g->src_mac, '-'); - if (p) - targ->src_mac_range = atoi(p+1); - - bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); - bcopy(targ->dst_mac, eh->ether_dhost, 6); - p = index(targ->g->dst_mac, '-'); - if (p) - targ->dst_mac_range = atoi(p+1); -#endif - if (verbose) - D("%s starts at %s", r->name, ether_ntoa(&r->start)); -} - -static struct targ *targs; -static int global_nthreads; - -/* control-C handler */ -static void -sigint_h(int sig) -{ - int i; - - (void)sig; /* UNUSED */ - for (i = 0; i < global_nthreads; i++) { - targs[i].cancel = 1; - } - signal(SIGINT, SIG_DFL); -} - -/* sysctl wrapper to return the number of active CPUs */ -static int -system_ncpus(void) -{ - int ncpus; -#if defined (__FreeBSD__) - int mib[2] = { CTL_HW, HW_NCPU }; - size_t len = sizeof(mib); - sysctl(mib, 2, &ncpus, &len, NULL, 0); -#elif defined(linux) - ncpus = sysconf(_SC_NPROCESSORS_ONLN); -#else /* others */ - ncpus = 1; -#endif /* others */ - return (ncpus); -} - -#ifdef __linux__ -#define sockaddr_dl sockaddr_ll -#define sdl_family sll_family -#define AF_LINK AF_PACKET -#define LLADDR(s) s->sll_addr; -#include -#define TAP_CLONEDEV "/dev/net/tun" -#endif /* __linux__ */ - -#ifdef __FreeBSD__ -#include -#define TAP_CLONEDEV "/dev/tap" -#endif /* __FreeBSD */ - -#ifdef __APPLE__ -// #warning TAP not supported on apple ? -#include -#define TAP_CLONEDEV "/dev/tap" -#endif /* __APPLE__ */ - - -/* - * parse the vale configuration in conf and put it in nmr. - * Return the flag set if necessary. - * The configuration may consist of 0 to 4 numbers separated - * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. - * Missing numbers or zeroes stand for default values. - * As an additional convenience, if exactly one number - * is specified, then this is assigned to both #tx-slots and #rx-slots. - * If there is no 4th number, then the 3rd is assigned to both #tx-rings - * and #rx-rings. - */ -int -parse_nmr_config(const char* conf, struct nmreq *nmr) -{ - char *w, *tok; - int i, v; - - nmr->nr_tx_rings = nmr->nr_rx_rings = 0; - nmr->nr_tx_slots = nmr->nr_rx_slots = 0; - if (conf == NULL || ! *conf) - return 0; - w = strdup(conf); - for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { - v = atoi(tok); - switch (i) { - case 0: - nmr->nr_tx_slots = nmr->nr_rx_slots = v; - break; - case 1: - nmr->nr_rx_slots = v; - break; - case 2: - nmr->nr_tx_rings = nmr->nr_rx_rings = v; - break; - case 3: - nmr->nr_rx_rings = v; - break; - default: - D("ignored config: %s", tok); - break; - } - } - D("txr %d txd %d rxr %d rxd %d", - nmr->nr_tx_rings, nmr->nr_tx_slots, - nmr->nr_rx_rings, nmr->nr_rx_slots); - free(w); - return (nmr->nr_tx_rings || nmr->nr_tx_slots || - nmr->nr_rx_rings || nmr->nr_rx_slots) ? - NM_OPEN_RING_CFG : 0; -} - - -/* - * locate the src mac address for our interface, put it - * into the user-supplied buffer. return 0 if ok, -1 on error. - */ -static int -source_hwaddr(const char *ifname, char *buf) -{ - struct ifaddrs *ifaphead, *ifap; - int l = sizeof(ifap->ifa_name); - - if (getifaddrs(&ifaphead) != 0) { - D("getifaddrs %s failed", ifname); - return (-1); - } - - for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { - struct sockaddr_dl *sdl = - (struct sockaddr_dl *)ifap->ifa_addr; - uint8_t *mac; - - if (!sdl || sdl->sdl_family != AF_LINK) - continue; - if (strncmp(ifap->ifa_name, ifname, l) != 0) - continue; - mac = (uint8_t *)LLADDR(sdl); - sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", - mac[0], mac[1], mac[2], - mac[3], mac[4], mac[5]); - if (verbose) - D("source hwaddr %s", buf); - break; - } - freeifaddrs(ifaphead); - return ifap ? 0 : 1; -} - - -/* set the thread affinity. */ -static int -setaffinity(pthread_t me, int i) -{ - cpuset_t cpumask; - - if (i == -1) - return 0; - - /* Set thread affinity affinity.*/ - CPU_ZERO(&cpumask); - CPU_SET(i, &cpumask); - - if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { - D("Unable to set affinity: %s", strerror(errno)); - return 1; - } - return 0; -} - -/* Compute the checksum of the given ip header. */ -static uint16_t -checksum(const void *data, uint16_t len, uint32_t sum) -{ - const uint8_t *addr = data; - uint32_t i; - - /* Checksum all the pairs of bytes first... */ - for (i = 0; i < (len & ~1U); i += 2) { - sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); - if (sum > 0xFFFF) - sum -= 0xFFFF; - } - /* - * If there's a single byte left over, checksum it, too. - * Network byte order is big-endian, so the remaining byte is - * the high byte. - */ - if (i < len) { - sum += addr[i] << 8; - if (sum > 0xFFFF) - sum -= 0xFFFF; - } - return sum; -} - -static u_int16_t -wrapsum(u_int32_t sum) -{ - sum = ~sum & 0xFFFF; - return (htons(sum)); -} - -/* Check the payload of the packet for errors (use it for debug). - * Look for consecutive ascii representations of the size of the packet. - */ -static void -dump_payload(char *p, int len, struct netmap_ring *ring, int cur) -{ - char buf[128]; - int i, j, i0; - - /* get the length in ASCII of the length of the packet. */ - - printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", - ring, cur, ring->slot[cur].buf_idx, - ring->slot[cur].flags, len); - /* hexdump routine */ - for (i = 0; i < len; ) { - memset(buf, sizeof(buf), ' '); - sprintf(buf, "%5d: ", i); - i0 = i; - for (j=0; j < 16 && i < len; i++, j++) - sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); - i = i0; - for (j=0; j < 16 && i < len; i++, j++) - sprintf(buf+7+j + 48, "%c", - isprint(p[i]) ? p[i] : '.'); - printf("%s\n", buf); - } -} - -/* - * Fill a packet with some payload. - * We create a UDP packet so the payload starts at - * 14+20+8 = 42 bytes. - */ -#ifdef __linux__ -#define uh_sport source -#define uh_dport dest -#define uh_ulen len -#define uh_sum check -#endif /* linux */ - -/* - * increment the addressed in the packet, - * starting from the least significant field. - * DST_IP DST_PORT SRC_IP SRC_PORT - */ -static void -update_addresses(struct pkt *pkt, struct glob_arg *g) -{ - uint32_t a; - uint16_t p; - struct ip *ip = &pkt->ip; - struct udphdr *udp = &pkt->udp; - - do { - p = ntohs(udp->uh_sport); - if (p < g->src_ip.port1) { /* just inc, no wrap */ - udp->uh_sport = htons(p + 1); - break; - } - udp->uh_sport = htons(g->src_ip.port0); - - a = ntohl(ip->ip_src.s_addr); - if (a < g->src_ip.end) { /* just inc, no wrap */ - ip->ip_src.s_addr = htonl(a + 1); - break; - } - ip->ip_src.s_addr = htonl(g->src_ip.start); - - udp->uh_sport = htons(g->src_ip.port0); - p = ntohs(udp->uh_dport); - if (p < g->dst_ip.port1) { /* just inc, no wrap */ - udp->uh_dport = htons(p + 1); - break; - } - udp->uh_dport = htons(g->dst_ip.port0); - - a = ntohl(ip->ip_dst.s_addr); - if (a < g->dst_ip.end) { /* just inc, no wrap */ - ip->ip_dst.s_addr = htonl(a + 1); - break; - } - ip->ip_dst.s_addr = htonl(g->dst_ip.start); - } while (0); - // update checksum -} - -/* - * initialize one packet and prepare for the next one. - * The copy could be done better instead of repeating it each time. - */ -static void -initialize_packet(struct targ *targ) -{ - struct pkt *pkt = &targ->pkt; - struct ether_header *eh; - struct ip *ip; - struct udphdr *udp; - uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); - const char *payload = targ->g->options & OPT_INDIRECT ? - indirect_payload : default_payload; - int i, l0 = strlen(payload); - - /* create a nice NUL-terminated string */ - for (i = 0; i < paylen; i += l0) { - if (l0 > paylen - i) - l0 = paylen - i; // last round - bcopy(payload, pkt->body + i, l0); - } - pkt->body[i-1] = '\0'; - ip = &pkt->ip; - - /* prepare the headers */ - ip->ip_v = IPVERSION; - ip->ip_hl = 5; - ip->ip_id = 0; - ip->ip_tos = IPTOS_LOWDELAY; - ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); - ip->ip_id = 0; - ip->ip_off = htons(IP_DF); /* Don't fragment */ - ip->ip_ttl = IPDEFTTL; - ip->ip_p = IPPROTO_UDP; - ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); - ip->ip_src.s_addr = htonl(targ->g->src_ip.start); - ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); - - - udp = &pkt->udp; - udp->uh_sport = htons(targ->g->src_ip.port0); - udp->uh_dport = htons(targ->g->dst_ip.port0); - udp->uh_ulen = htons(paylen); - /* Magic: taken from sbin/dhclient/packet.c */ - udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), - checksum(pkt->body, - paylen - sizeof(*udp), - checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), - IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) - ) - ) - )); - - eh = &pkt->eh; - bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); - bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); - eh->ether_type = htons(ETHERTYPE_IP); - - bzero(&pkt->vh, sizeof(pkt->vh)); - // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); -} - - - -/* - * create and enqueue a batch of packets on a ring. - * On the last one set NS_REPORT to tell the driver to generate - * an interrupt when done. - */ -static int -send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, - int size, struct glob_arg *g, u_int count, int options, - u_int nfrags) -{ - u_int n, sent, cur = ring->cur; - u_int fcnt; - - n = nm_ring_space(ring); - if (n < count) - count = n; - if (count < nfrags) { - D("truncating packet, no room for frags %d %d", - count, nfrags); - } -#if 0 - if (options & (OPT_COPY | OPT_PREFETCH) ) { - for (sent = 0; sent < count; sent++) { - struct netmap_slot *slot = &ring->slot[cur]; - char *p = NETMAP_BUF(ring, slot->buf_idx); - - __builtin_prefetch(p); - cur = nm_ring_next(ring, cur); - } - cur = ring->cur; - } -#endif - for (fcnt = nfrags, sent = 0; sent < count; sent++) { - struct netmap_slot *slot = &ring->slot[cur]; - char *p = NETMAP_BUF(ring, slot->buf_idx); - - slot->flags = 0; - if (options & OPT_INDIRECT) { - slot->flags |= NS_INDIRECT; - slot->ptr = (uint64_t)frame; - } else if (options & OPT_COPY) { - nm_pkt_copy(frame, p, size); - if (fcnt == nfrags) - update_addresses(pkt, g); - } else if (options & OPT_MEMCPY) { - memcpy(p, frame, size); - if (fcnt == nfrags) - update_addresses(pkt, g); - } else if (options & OPT_PREFETCH) { - __builtin_prefetch(p); - } - if (options & OPT_DUMP) - dump_payload(p, size, ring, cur); - slot->len = size; - if (--fcnt > 0) - slot->flags |= NS_MOREFRAG; - else - fcnt = nfrags; - if (sent == count - 1) { - slot->flags &= ~NS_MOREFRAG; - slot->flags |= NS_REPORT; - } - cur = nm_ring_next(ring, cur); - } - ring->head = ring->cur = cur; - - return (sent); -} - -/* - * Send a packet, and wait for a response. - * The payload (after UDP header, ofs 42) has a 4-byte sequence - * followed by a struct timeval (or bintime?) - */ -#define PAY_OFS 42 /* where in the pkt... */ - -static void * -pinger_body(void *data) -{ - struct targ *targ = (struct targ *) data; - struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; - struct netmap_if *nifp = targ->nmd->nifp; - int i, rx = 0, n = targ->g->npackets; - void *frame; - int size; - uint32_t sent = 0; - struct timespec ts, now, last_print; - uint32_t count = 0, min = 1000000000, av = 0; - - frame = &targ->pkt; - frame += sizeof(targ->pkt.vh) - targ->g->virt_header; - size = targ->g->pkt_size + targ->g->virt_header; - - - if (targ->g->nthreads > 1) { - D("can only ping with 1 thread"); - return NULL; - } - - clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); - now = last_print; - while (n == 0 || (int)sent < n) { - struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); - struct netmap_slot *slot; - char *p; - for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ - slot = &ring->slot[ring->cur]; - slot->len = size; - p = NETMAP_BUF(ring, slot->buf_idx); - - if (nm_ring_empty(ring)) { - D("-- ouch, cannot send"); - } else { - nm_pkt_copy(frame, p, size); - clock_gettime(CLOCK_REALTIME_PRECISE, &ts); - bcopy(&sent, p+42, sizeof(sent)); - bcopy(&ts, p+46, sizeof(ts)); - sent++; - ring->head = ring->cur = nm_ring_next(ring, ring->cur); - } - } - /* should use a parameter to decide how often to send */ - if (poll(&pfd, 1, 3000) <= 0) { - D("poll error/timeout on queue %d: %s", targ->me, - strerror(errno)); - continue; - } - /* see what we got back */ - for (i = targ->nmd->first_tx_ring; - i <= targ->nmd->last_tx_ring; i++) { - ring = NETMAP_RXRING(nifp, i); - while (!nm_ring_empty(ring)) { - uint32_t seq; - slot = &ring->slot[ring->cur]; - p = NETMAP_BUF(ring, slot->buf_idx); - - clock_gettime(CLOCK_REALTIME_PRECISE, &now); - bcopy(p+42, &seq, sizeof(seq)); - bcopy(p+46, &ts, sizeof(ts)); - ts.tv_sec = now.tv_sec - ts.tv_sec; - ts.tv_nsec = now.tv_nsec - ts.tv_nsec; - if (ts.tv_nsec < 0) { - ts.tv_nsec += 1000000000; - ts.tv_sec--; - } - if (1) D("seq %d/%d delta %d.%09d", seq, sent, - (int)ts.tv_sec, (int)ts.tv_nsec); - if (ts.tv_nsec < (int)min) - min = ts.tv_nsec; - count ++; - av += ts.tv_nsec; - ring->head = ring->cur = nm_ring_next(ring, ring->cur); - rx++; - } - } - //D("tx %d rx %d", sent, rx); - //usleep(100000); - ts.tv_sec = now.tv_sec - last_print.tv_sec; - ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; - if (ts.tv_nsec < 0) { - ts.tv_nsec += 1000000000; - ts.tv_sec--; - } - if (ts.tv_sec >= 1) { - D("count %d min %d av %d", - count, min, av/count); - count = 0; - av = 0; - min = 100000000; - last_print = now; - } - } - return NULL; -} - - -/* - * reply to ping requests - */ -static void * -ponger_body(void *data) -{ - struct targ *targ = (struct targ *) data; - struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; - struct netmap_if *nifp = targ->nmd->nifp; - struct netmap_ring *txring, *rxring; - int i, rx = 0, sent = 0, n = targ->g->npackets; - - if (targ->g->nthreads > 1) { - D("can only reply ping with 1 thread"); - return NULL; - } - D("understood ponger %d but don't know how to do it", n); - while (n == 0 || sent < n) { - uint32_t txcur, txavail; -//#define BUSYWAIT -#ifdef BUSYWAIT - ioctl(pfd.fd, NIOCRXSYNC, NULL); -#else - if (poll(&pfd, 1, 1000) <= 0) { - D("poll error/timeout on queue %d: %s", targ->me, - strerror(errno)); - continue; - } -#endif - txring = NETMAP_TXRING(nifp, 0); - txcur = txring->cur; - txavail = nm_ring_space(txring); - /* see what we got back */ - for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { - rxring = NETMAP_RXRING(nifp, i); - while (!nm_ring_empty(rxring)) { - uint16_t *spkt, *dpkt; - uint32_t cur = rxring->cur; - struct netmap_slot *slot = &rxring->slot[cur]; - char *src, *dst; - src = NETMAP_BUF(rxring, slot->buf_idx); - //D("got pkt %p of size %d", src, slot->len); - rxring->head = rxring->cur = nm_ring_next(rxring, cur); - rx++; - if (txavail == 0) - continue; - dst = NETMAP_BUF(txring, - txring->slot[txcur].buf_idx); - /* copy... */ - dpkt = (uint16_t *)dst; - spkt = (uint16_t *)src; - nm_pkt_copy(src, dst, slot->len); - dpkt[0] = spkt[3]; - dpkt[1] = spkt[4]; - dpkt[2] = spkt[5]; - dpkt[3] = spkt[0]; - dpkt[4] = spkt[1]; - dpkt[5] = spkt[2]; - txring->slot[txcur].len = slot->len; - /* XXX swap src dst mac */ - txcur = nm_ring_next(txring, txcur); - txavail--; - sent++; - } - } - txring->head = txring->cur = txcur; - targ->count = sent; -#ifdef BUSYWAIT - ioctl(pfd.fd, NIOCTXSYNC, NULL); -#endif - //D("tx %d rx %d", sent, rx); - } - return NULL; -} - -static __inline int -timespec_ge(const struct timespec *a, const struct timespec *b) -{ - - if (a->tv_sec > b->tv_sec) - return (1); - if (a->tv_sec < b->tv_sec) - return (0); - if (a->tv_nsec >= b->tv_nsec) - return (1); - return (0); -} - -static __inline struct timespec -timeval2spec(const struct timeval *a) -{ - struct timespec ts = { - .tv_sec = a->tv_sec, - .tv_nsec = a->tv_usec * 1000 - }; - return ts; -} - -static __inline struct timeval -timespec2val(const struct timespec *a) -{ - struct timeval tv = { - .tv_sec = a->tv_sec, - .tv_usec = a->tv_nsec / 1000 - }; - return tv; -} - - -static __inline struct timespec -timespec_add(struct timespec a, struct timespec b) -{ - struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; - if (ret.tv_nsec >= 1000000000) { - ret.tv_sec++; - ret.tv_nsec -= 1000000000; - } - return ret; -} - -static __inline struct timespec -timespec_sub(struct timespec a, struct timespec b) -{ - struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; - if (ret.tv_nsec < 0) { - ret.tv_sec--; - ret.tv_nsec += 1000000000; - } - return ret; -} - - -/* - * wait until ts, either busy or sleeping if more than 1ms. - * Return wakeup time. - */ -static struct timespec -wait_time(struct timespec ts) -{ - for (;;) { - struct timespec w, cur; - clock_gettime(CLOCK_REALTIME_PRECISE, &cur); - w = timespec_sub(ts, cur); - if (w.tv_sec < 0) - return cur; - else if (w.tv_sec > 0 || w.tv_nsec > 1000000) - poll(NULL, 0, 1); - } -} - -static void * -sender_body(void *data) -{ - struct targ *targ = (struct targ *) data; - struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; - struct netmap_if *nifp; - struct netmap_ring *txring; - int i, n = targ->g->npackets / targ->g->nthreads; - int64_t sent = 0; - int options = targ->g->options | OPT_COPY; - struct timespec nexttime = { 0, 0}; // XXX silence compiler - int rate_limit = targ->g->tx_rate; - struct pkt *pkt = &targ->pkt; - void *frame; - int size; - - frame = pkt; - frame += sizeof(pkt->vh) - targ->g->virt_header; - size = targ->g->pkt_size + targ->g->virt_header; - - D("start"); - if (setaffinity(targ->thread, targ->affinity)) - goto quit; - - /* main loop.*/ - clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); - if (rate_limit) { - targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); - targ->tic.tv_nsec = 0; - wait_time(targ->tic); - nexttime = targ->tic; - } - if (targ->g->dev_type == DEV_TAP) { - D("writing to file desc %d", targ->g->main_fd); - - for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { - if (write(targ->g->main_fd, frame, size) != -1) - sent++; - update_addresses(pkt, targ->g); - if (i > 10000) { - targ->count = sent; - i = 0; - } - } -#ifndef NO_PCAP - } else if (targ->g->dev_type == DEV_PCAP) { - pcap_t *p = targ->g->p; - - for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { - if (pcap_inject(p, frame, size) != -1) - sent++; - update_addresses(pkt, targ->g); - if (i > 10000) { - targ->count = sent; - i = 0; - } - } -#endif /* NO_PCAP */ - } else { - int tosend = 0; - int frags = targ->g->frags; - - nifp = targ->nmd->nifp; - while (!targ->cancel && (n == 0 || sent < n)) { - - if (rate_limit && tosend <= 0) { - tosend = targ->g->burst; - nexttime = timespec_add(nexttime, targ->g->tx_period); - wait_time(nexttime); - } - - /* - * wait for available room in the send queue(s) - */ - if (poll(&pfd, 1, 2000) <= 0) { - if (targ->cancel) - break; - D("poll error/timeout on queue %d: %s", targ->me, - strerror(errno)); - // goto quit; - } - if (pfd.revents & POLLERR) { - D("poll error"); - goto quit; - } - /* - * scan our queues and send on those with room - */ - if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { - D("drop copy"); - options &= ~OPT_COPY; - } - for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { - int m, limit = rate_limit ? tosend : targ->g->burst; - if (n > 0 && n - sent < limit) - limit = n - sent; - txring = NETMAP_TXRING(nifp, i); - if (nm_ring_empty(txring)) - continue; - if (frags > 1) - limit = ((limit + frags - 1) / frags) * frags; - - m = send_packets(txring, pkt, frame, size, targ->g, - limit, options, frags); - ND("limit %d tail %d frags %d m %d", - limit, txring->tail, frags, m); - sent += m; - targ->count = sent; - if (rate_limit) { - tosend -= m; - if (tosend <= 0) - break; - } - } - } - /* flush any remaining packets */ - ioctl(pfd.fd, NIOCTXSYNC, NULL); - - /* final part: wait all the TX queues to be empty. */ - for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { - txring = NETMAP_TXRING(nifp, i); - while (nm_tx_pending(txring)) { - ioctl(pfd.fd, NIOCTXSYNC, NULL); - usleep(1); /* wait 1 tick */ - } - } - } /* end DEV_NETMAP */ - - clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); - targ->completed = 1; - targ->count = sent; - -quit: - /* reset the ``used`` flag. */ - targ->used = 0; - - return (NULL); -} - - -#ifndef NO_PCAP -static void -receive_pcap(u_char *user, const struct pcap_pkthdr * h, - const u_char * bytes) -{ - int *count = (int *)user; - (void)h; /* UNUSED */ - (void)bytes; /* UNUSED */ - (*count)++; -} -#endif /* !NO_PCAP */ - -static int -receive_packets(struct netmap_ring *ring, u_int limit, int dump) -{ - u_int cur, rx, n; - - cur = ring->cur; - n = nm_ring_space(ring); - if (n < limit) - limit = n; - for (rx = 0; rx < limit; rx++) { - struct netmap_slot *slot = &ring->slot[cur]; - char *p = NETMAP_BUF(ring, slot->buf_idx); - - if (dump) - dump_payload(p, slot->len, ring, cur); - - cur = nm_ring_next(ring, cur); - } - ring->head = ring->cur = cur; - - return (rx); -} - -static void * -receiver_body(void *data) -{ - struct targ *targ = (struct targ *) data; - struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; - struct netmap_if *nifp; - struct netmap_ring *rxring; - int i; - uint64_t received = 0; - - if (setaffinity(targ->thread, targ->affinity)) - goto quit; - - /* unbounded wait for the first packet. */ - for (;;) { - i = poll(&pfd, 1, 1000); - if (i > 0 && !(pfd.revents & POLLERR)) - break; - RD(1, "waiting for initial packets, poll returns %d %d", - i, pfd.revents); - } - - /* main loop, exit after 1s silence */ - clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); - if (targ->g->dev_type == DEV_TAP) { - D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd); - while (!targ->cancel) { - char buf[2048]; - /* XXX should we poll ? */ - if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) - targ->count++; - } -#ifndef NO_PCAP - } else if (targ->g->dev_type == DEV_PCAP) { - while (!targ->cancel) { - /* XXX should we poll ? */ - pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); - } -#endif /* !NO_PCAP */ - } else { - int dump = targ->g->options & OPT_DUMP; - - nifp = targ->nmd->nifp; - while (!targ->cancel) { - /* Once we started to receive packets, wait at most 1 seconds - before quitting. */ - if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { - clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); - targ->toc.tv_sec -= 1; /* Subtract timeout time. */ - goto out; - } - - if (pfd.revents & POLLERR) { - D("poll err"); - goto quit; - } - - for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { - int m; - - rxring = NETMAP_RXRING(nifp, i); - if (nm_ring_empty(rxring)) - continue; - - m = receive_packets(rxring, targ->g->burst, dump); - received += m; - } - targ->count = received; - } - } - - clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); - -out: - targ->completed = 1; - targ->count = received; - -quit: - /* reset the ``used`` flag. */ - targ->used = 0; - - return (NULL); -} - -/* very crude code to print a number in normalized form. - * Caller has to make sure that the buffer is large enough. - */ -static const char * -norm(char *buf, double val) -{ - char *units[] = { "", "K", "M", "G", "T" }; - u_int i; - - for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) - val /= 1000; - sprintf(buf, "%.2f %s", val, units[i]); - return buf; -} - -static void -tx_output(uint64_t sent, int size, double delta) -{ - double bw, raw_bw, pps; - char b1[40], b2[80], b3[80]; - - printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", - (unsigned long long)sent, size, delta); - if (delta == 0) - delta = 1e-6; - if (size < 60) /* correct for min packet size */ - size = 60; - pps = sent / delta; - bw = (8.0 * size * sent) / delta; - /* raw packets have4 bytes crc + 20 bytes framing */ - raw_bw = (8.0 * (size + 24) * sent) / delta; - - printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", - norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); -} - - -static void -rx_output(uint64_t received, double delta) -{ - double pps; - char b1[40]; - - printf("Received %llu packets, in %.2f seconds.\n", - (unsigned long long) received, delta); - - if (delta == 0) - delta = 1e-6; - pps = received / delta; - printf("Speed: %spps\n", norm(b1, pps)); -} - -static void -usage(void) -{ - const char *cmd = "pkt-gen"; - fprintf(stderr, - "Usage:\n" - "%s arguments\n" - "\t-i interface interface name\n" - "\t-f function tx rx ping pong\n" - "\t-n count number of iterations (can be 0)\n" - "\t-t pkts_to_send also forces tx mode\n" - "\t-r pkts_to_receive also forces rx mode\n" - "\t-l pkt_size in bytes excluding CRC\n" - "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" - "\t-s src_ip[:port[-src_ip:port]] single or range\n" - "\t-D dst-mac\n" - "\t-S src-mac\n" - "\t-a cpu_id use setaffinity\n" - "\t-b burst size testing, mostly\n" - "\t-c cores cores to use\n" - "\t-p threads processes/threads to use\n" - "\t-T report_ms milliseconds between reports\n" - "\t-P use libpcap instead of netmap\n" - "\t-w wait_for_link_time in seconds\n" - "\t-R rate in packets per second\n" - "\t-X dump payload\n" - "\t-H len add empty virtio-net-header with size 'len'\n" - "", - cmd); - - exit(0); -} - -static void -start_threads(struct glob_arg *g) -{ - int i; - - targs = calloc(g->nthreads, sizeof(*targs)); - /* - * Now create the desired number of threads, each one - * using a single descriptor. - */ - for (i = 0; i < g->nthreads; i++) { - struct targ *t = &targs[i]; - - bzero(t, sizeof(*t)); - t->fd = -1; /* default, with pcap */ - t->g = g; - - if (g->dev_type == DEV_NETMAP) { - struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ - - if (g->nthreads > 1) { - if (nmd.req.nr_flags != NR_REG_ALL_NIC) { - D("invalid nthreads mode %d", nmd.req.nr_flags); - continue; - } - nmd.req.nr_flags = NR_REG_ONE_NIC; - nmd.req.nr_ringid = i; - } - /* Only touch one of the rings (rx is already ok) */ - if (g->td_body == receiver_body) - nmd.req.nr_ringid |= NETMAP_NO_TX_POLL; - - /* register interface. Override ifname and ringid etc. */ - - t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags | - NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd); - if (t->nmd == NULL) { - D("Unable to open %s: %s", - t->g->ifname, strerror(errno)); - continue; - } - t->fd = t->nmd->fd; - - } else { - targs[i].fd = g->main_fd; - } - t->used = 1; - t->me = i; - if (g->affinity >= 0) { - if (g->affinity < g->cpus) - t->affinity = g->affinity; - else - t->affinity = i % g->cpus; - } else { - t->affinity = -1; - } - /* default, init packets */ - initialize_packet(t); - - if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { - D("Unable to create thread %d: %s", i, strerror(errno)); - t->used = 0; - } - } -} - -static void -main_thread(struct glob_arg *g) -{ - int i; - - uint64_t prev = 0; - uint64_t count = 0; - double delta_t; - struct timeval tic, toc; - - gettimeofday(&toc, NULL); - for (;;) { - struct timeval now, delta; - uint64_t pps, usec, my_count, npkts; - int done = 0; - - delta.tv_sec = g->report_interval/1000; - delta.tv_usec = (g->report_interval%1000)*1000; - select(0, NULL, NULL, NULL, &delta); - gettimeofday(&now, NULL); - timersub(&now, &toc, &toc); - my_count = 0; - for (i = 0; i < g->nthreads; i++) { - my_count += targs[i].count; - if (targs[i].used == 0) - done++; - } - usec = toc.tv_sec* 1000000 + toc.tv_usec; - if (usec < 10000) - continue; - npkts = my_count - prev; - pps = (npkts*1000000 + usec/2) / usec; - D("%llu pps (%llu pkts in %llu usec)", - (unsigned long long)pps, - (unsigned long long)npkts, - (unsigned long long)usec); - prev = my_count; - toc = now; - if (done == g->nthreads) - break; - } - - timerclear(&tic); - timerclear(&toc); - for (i = 0; i < g->nthreads; i++) { - struct timespec t_tic, t_toc; - /* - * Join active threads, unregister interfaces and close - * file descriptors. - */ - if (targs[i].used) - pthread_join(targs[i].thread, NULL); - close(targs[i].fd); - - if (targs[i].completed == 0) - D("ouch, thread %d exited with error", i); - - /* - * Collect threads output and extract information about - * how long it took to send all the packets. - */ - count += targs[i].count; - t_tic = timeval2spec(&tic); - t_toc = timeval2spec(&toc); - if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) - tic = timespec2val(&targs[i].tic); - if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) - toc = timespec2val(&targs[i].toc); - } - - /* print output. */ - timersub(&toc, &tic, &toc); - delta_t = toc.tv_sec + 1e-6* toc.tv_usec; - if (g->td_body == sender_body) - tx_output(count, g->pkt_size, delta_t); - else - rx_output(count, delta_t); - - if (g->dev_type == DEV_NETMAP) { - munmap(g->nmd->mem, g->nmd->req.nr_memsize); - close(g->main_fd); - } -} - - -struct sf { - char *key; - void *f; -}; - -static struct sf func[] = { - { "tx", sender_body }, - { "rx", receiver_body }, - { "ping", pinger_body }, - { "pong", ponger_body }, - { NULL, NULL } -}; - -static int -tap_alloc(char *dev) -{ - struct ifreq ifr; - int fd, err; - char *clonedev = TAP_CLONEDEV; - - (void)err; - (void)dev; - /* Arguments taken by the function: - * - * char *dev: the name of an interface (or '\0'). MUST have enough - * space to hold the interface name if '\0' is passed - * int flags: interface flags (eg, IFF_TUN etc.) - */ - -#ifdef __FreeBSD__ - if (dev[3]) { /* tapSomething */ - static char buf[128]; - snprintf(buf, sizeof(buf), "/dev/%s", dev); - clonedev = buf; - } -#endif - /* open the device */ - if( (fd = open(clonedev, O_RDWR)) < 0 ) { - return fd; - } - D("%s open successful", clonedev); - - /* preparation of the struct ifr, of type "struct ifreq" */ - memset(&ifr, 0, sizeof(ifr)); - -#ifdef linux - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - - if (*dev) { - /* if a device name was specified, put it in the structure; otherwise, - * the kernel will try to allocate the "next" device of the - * specified type */ - strncpy(ifr.ifr_name, dev, IFNAMSIZ); - } - - /* try to create the device */ - if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { - D("failed to to a TUNSETIFF: %s", strerror(errno)); - close(fd); - return err; - } - - /* if the operation was successful, write back the name of the - * interface to the variable "dev", so the caller can know - * it. Note that the caller MUST reserve space in *dev (see calling - * code below) */ - strcpy(dev, ifr.ifr_name); - D("new name is %s", dev); -#endif /* linux */ - - /* this is the special file descriptor that the caller will use to talk - * with the virtual interface */ - return fd; -} - -int -main(int arc, char **argv) -{ - int i; - - struct glob_arg g; - - int ch; - int wait_link = 2; - int devqueues = 1; /* how many device queues */ - - bzero(&g, sizeof(g)); - - g.main_fd = -1; - g.td_body = receiver_body; - g.report_interval = 1000; /* report interval */ - g.affinity = -1; - /* ip addresses can also be a range x.x.x.x-x.x.x.y */ - g.src_ip.name = "10.0.0.1"; - g.dst_ip.name = "10.1.0.1"; - g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; - g.src_mac.name = NULL; - g.pkt_size = 60; - g.burst = 512; // default - g.nthreads = 1; - g.cpus = 1; - g.forever = 1; - g.tx_rate = 0; - g.frags = 1; - g.nmr_config = ""; - g.virt_header = 0; - - while ( (ch = getopt(arc, argv, - "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:")) != -1) { - struct sf *fn; - - switch(ch) { - default: - D("bad option %c %s", ch, optarg); - usage(); - break; - - case 'n': - g.npackets = atoi(optarg); - break; - - case 'F': - i = atoi(optarg); - if (i < 1 || i > 63) { - D("invalid frags %d [1..63], ignore", i); - break; - } - g.frags = i; - break; - - case 'f': - for (fn = func; fn->key; fn++) { - if (!strcmp(fn->key, optarg)) - break; - } - if (fn->key) - g.td_body = fn->f; - else - D("unrecognised function %s", optarg); - break; - - case 'o': /* data generation options */ - g.options = atoi(optarg); - break; - - case 'a': /* force affinity */ - g.affinity = atoi(optarg); - break; - - case 'i': /* interface */ - /* a prefix of tap: netmap: or pcap: forces the mode. - * otherwise we guess - */ - D("interface is %s", optarg); - if (strlen(optarg) > MAX_IFNAMELEN - 8) { - D("ifname too long %s", optarg); - break; - } - strcpy(g.ifname, optarg); - if (!strcmp(optarg, "null")) { - g.dev_type = DEV_NETMAP; - g.dummy_send = 1; - } else if (!strncmp(optarg, "tap:", 4)) { - g.dev_type = DEV_TAP; - strcpy(g.ifname, optarg + 4); - } else if (!strncmp(optarg, "pcap:", 5)) { - g.dev_type = DEV_PCAP; - strcpy(g.ifname, optarg + 5); - } else if (!strncmp(optarg, "netmap:", 7) || - !strncmp(optarg, "vale", 4)) { - g.dev_type = DEV_NETMAP; - } else if (!strncmp(optarg, "tap", 3)) { - g.dev_type = DEV_TAP; - } else { /* prepend netmap: */ - g.dev_type = DEV_NETMAP; - sprintf(g.ifname, "netmap:%s", optarg); - } - break; - - case 'I': - g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ - break; - - case 'l': /* pkt_size */ - g.pkt_size = atoi(optarg); - break; - - case 'd': - g.dst_ip.name = optarg; - break; - - case 's': - g.src_ip.name = optarg; - break; - - case 'T': /* report interval */ - g.report_interval = atoi(optarg); - break; - - case 'w': - wait_link = atoi(optarg); - break; - - case 'W': /* XXX changed default */ - g.forever = 0; /* do not exit rx even with no traffic */ - break; - - case 'b': /* burst */ - g.burst = atoi(optarg); - break; - case 'c': - g.cpus = atoi(optarg); - break; - case 'p': - g.nthreads = atoi(optarg); - break; - - case 'D': /* destination mac */ - g.dst_mac.name = optarg; - break; - - case 'S': /* source mac */ - g.src_mac.name = optarg; - break; - case 'v': - verbose++; - break; - case 'R': - g.tx_rate = atoi(optarg); - break; - case 'X': - g.options |= OPT_DUMP; - break; - case 'C': - g.nmr_config = strdup(optarg); - break; - case 'H': - g.virt_header = atoi(optarg); - break; - case 'e': /* extra bufs */ - g.extra_bufs = atoi(optarg); - break; - } - } - - if (g.ifname == NULL) { - D("missing ifname"); - usage(); - } - - i = system_ncpus(); - if (g.cpus < 0 || g.cpus > i) { - D("%d cpus is too high, have only %d cpus", g.cpus, i); - usage(); - } - if (g.cpus == 0) - g.cpus = i; - - if (g.pkt_size < 16 || g.pkt_size > 1536) { - D("bad pktsize %d\n", g.pkt_size); - usage(); - } - - if (g.src_mac.name == NULL) { - static char mybuf[20] = "00:00:00:00:00:00"; - /* retrieve source mac address. */ - if (source_hwaddr(g.ifname, mybuf) == -1) { - D("Unable to retrieve source mac"); - // continue, fail later - } - g.src_mac.name = mybuf; - } - /* extract address ranges */ - extract_ip_range(&g.src_ip); - extract_ip_range(&g.dst_ip); - extract_mac_range(&g.src_mac); - extract_mac_range(&g.dst_mac); - - if (g.src_ip.start != g.src_ip.end || - g.src_ip.port0 != g.src_ip.port1 || - g.dst_ip.start != g.dst_ip.end || - g.dst_ip.port0 != g.dst_ip.port1) - g.options |= OPT_COPY; - - if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 - && g.virt_header != VIRT_HDR_2) { - D("bad virtio-net-header length"); - usage(); - } - - if (g.dev_type == DEV_TAP) { - D("want to use tap %s", g.ifname); - g.main_fd = tap_alloc(g.ifname); - if (g.main_fd < 0) { - D("cannot open tap %s", g.ifname); - usage(); - } -#ifndef NO_PCAP - } else if (g.dev_type == DEV_PCAP) { - char pcap_errbuf[PCAP_ERRBUF_SIZE]; - - D("using pcap on %s", g.ifname); - pcap_errbuf[0] = '\0'; // init the buffer - g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf); - if (g.p == NULL) { - D("cannot open pcap on %s", g.ifname); - usage(); - } -#endif /* !NO_PCAP */ - } else if (g.dummy_send) { /* but DEV_NETMAP */ - D("using a dummy send routine"); - } else { - struct nm_desc base_nmd; - - bzero(&base_nmd, sizeof(base_nmd)); - - g.nmd_flags = 0; - g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req); - if (g.extra_bufs) { - base_nmd.req.nr_arg3 = g.extra_bufs; - g.nmd_flags |= NM_OPEN_ARG3; - } - - /* - * Open the netmap device using nm_open(). - * - * protocol stack and may cause a reset of the card, - * which in turn may take some time for the PHY to - * reconfigure. We do the open here to have time to reset. - */ - g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd); - if (g.nmd == NULL) { - D("Unable to open %s: %s", g.ifname, strerror(errno)); - goto out; - } - g.main_fd = g.nmd->fd; - D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); - - devqueues = g.nmd->req.nr_rx_rings; - - /* validate provided nthreads. */ - if (g.nthreads < 1 || g.nthreads > devqueues) { - D("bad nthreads %d, have %d queues", g.nthreads, devqueues); - // continue, fail later - } - - if (verbose) { - struct netmap_if *nifp = g.nmd->nifp; - struct nmreq *req = &g.nmd->req; - - D("nifp at offset %d, %d tx %d rx region %d", - req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, - req->nr_arg2); - for (i = 0; i <= req->nr_tx_rings; i++) { - D(" TX%d at 0x%lx", i, - (char *)NETMAP_TXRING(nifp, i) - (char *)nifp); - } - for (i = 0; i <= req->nr_rx_rings; i++) { - D(" RX%d at 0x%lx", i, - (char *)NETMAP_RXRING(nifp, i) - (char *)nifp); - } - } - - /* Print some debug information. */ - fprintf(stdout, - "%s %s: %d queues, %d threads and %d cpus.\n", - (g.td_body == sender_body) ? "Sending on" : "Receiving from", - g.ifname, - devqueues, - g.nthreads, - g.cpus); - if (g.td_body == sender_body) { - fprintf(stdout, "%s -> %s (%s -> %s)\n", - g.src_ip.name, g.dst_ip.name, - g.src_mac.name, g.dst_mac.name); - } - -out: - /* Exit if something went wrong. */ - if (g.main_fd < 0) { - D("aborting"); - usage(); - } - } - - - if (g.options) { - D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", - g.options & OPT_PREFETCH ? " prefetch" : "", - g.options & OPT_ACCESS ? " access" : "", - g.options & OPT_MEMCPY ? " memcpy" : "", - g.options & OPT_INDIRECT ? " indirect" : "", - g.options & OPT_COPY ? " copy" : ""); - } - - g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; - if (g.tx_rate > 0) { - /* try to have at least something every second, - * reducing the burst size to some 0.01s worth of data - * (but no less than one full set of fragments) - */ - uint64_t x; - int lim = (g.tx_rate)/300; - if (g.burst > lim) - g.burst = lim; - if (g.burst < g.frags) - g.burst = g.frags; - x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; - g.tx_period.tv_nsec = x; - g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; - g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; - } - if (g.td_body == sender_body) - D("Sending %d packets every %ld.%09ld s", - g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); - /* Wait for PHY reset. */ - D("Wait %d secs for phy reset", wait_link); - sleep(wait_link); - D("Ready..."); - - /* Install ^C handler. */ - global_nthreads = g.nthreads; - signal(SIGINT, sigint_h); - - start_threads(&g); - main_thread(&g); - return 0; -} - -/* end of file */ diff --git a/netmap/examples/test_select.c b/netmap/examples/test_select.c deleted file mode 100644 index ed737c3..0000000 --- a/netmap/examples/test_select.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * test minimum select time - * - * ./prog usec [method [duration]] - */ - -#include -#include -#include -#include -#include -#include -#include - -enum { M_SELECT =0 , M_POLL, M_USLEEP }; -static const char *names[] = { "select", "poll", "usleep" }; -int -main(int argc, char *argv[]) -{ - struct timeval ta, tb, prev; - int usec = 1, total = 0, method = M_SELECT; - uint32_t *vals = NULL; - uint32_t i, count = 0; -#define LIM 1000000 - - if (argc > 1) - usec = atoi(argv[1]); - if (usec <= 0) - usec = 1; - else if (usec > 500000) - usec = 500000; - if (argc > 2) { - if (!strcmp(argv[2], "poll")) - method = M_POLL; - else if (!strcmp(argv[2], "usleep")) - method = M_USLEEP; - } - if (argc > 3) - total = atoi(argv[3]); - if (total < 1) - total = 1; - else if (total > 10) - total = 10; - fprintf(stderr, "testing %s for %dus over %ds\n", - names[method], usec, total); - - gettimeofday(&ta, NULL); - prev = ta; - vals = calloc(LIM, sizeof(uint32_t)); - for (;;) { - if (method == M_SELECT) { - struct timeval to = { 0, usec }; - select(0, NULL, NULL, NULL, &to); - } else if (method == M_POLL) { - poll(NULL, 0, usec/1000); - } else { - usleep(usec); - } - gettimeofday(&tb, NULL); - timersub(&tb, &prev, &prev); - if (count < LIM) - vals[count] = prev.tv_usec; - count++; - prev = tb; - timersub(&tb, &ta, &tb); - if (tb.tv_sec > total) - break; - } - fprintf(stderr, "%dus actually took %dus\n", - usec, (int)(tb.tv_sec * 1000000 + tb.tv_usec) / count ); - for (i = 0; i < count && i < LIM; i++) - fprintf(stdout, "%d\n", vals[i]); - return 0; -} diff --git a/netmap/examples/testlock.c b/netmap/examples/testlock.c deleted file mode 100644 index b2cb073..0000000 --- a/netmap/examples/testlock.c +++ /dev/null @@ -1,924 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id$ - * - * Test program to study various ops and concurrency issues. - * Create multiple threads, possibly bind to cpus, and run a workload. - * - * cc -O2 -Werror -Wall testlock.c -o testlock -lpthread - * you might need -lrt - */ - -#include -#include -#include /* pthread_* */ - -#if defined(__APPLE__) - -#include -#include -#define atomic_add_int(p, n) OSAtomicAdd32(n, (int *)p) -#define atomic_cmpset_32(p, o, n) OSAtomicCompareAndSwap32(o, n, (int *)p) - -#elif defined(linux) - -int atomic_cmpset_32(volatile uint32_t *p, uint32_t old, uint32_t new) -{ - int ret = *p == old; - *p = new; - return ret; -} - -#if defined(HAVE_GCC_ATOMICS) -int atomic_add_int(volatile int *p, int v) -{ - return __sync_fetch_and_add(p, v); -} -#else -inline -uint32_t atomic_add_int(uint32_t *p, int v) -{ - __asm __volatile ( - " lock xaddl %0, %1 ; " - : "+r" (v), /* 0 (result) */ - "=m" (*p) /* 1 */ - : "m" (*p)); /* 2 */ - return (v); -} -#endif - -#else /* FreeBSD */ -#include -#include -#include /* pthread w/ affinity */ - -#if __FreeBSD_version > 500000 -#include /* cpu_set */ -#if __FreeBSD_version > 800000 -#define HAVE_AFFINITY -#endif - - -#else /* FreeBSD 4.x */ -int atomic_cmpset_32(volatile uint32_t *p, uint32_t old, uint32_t new) -{ - int ret = *p == old; - *p = new; - return ret; -} - -#define PRIu64 "llu" -#endif /* FreeBSD 4.x */ - -#endif /* FreeBSD */ - -#include /* signal */ -#include -#include -#include -#include /* PRI* macros */ -#include /* strcmp */ -#include /* open */ -#include /* getopt */ - - -#include /* sysctl */ -#include /* timersub */ - -#define ONE_MILLION 1000000 -/* debug support */ -#define ND(format, ...) -#define D(format, ...) \ - fprintf(stderr, "%s [%d] " format "\n", \ - __FUNCTION__, __LINE__, ##__VA_ARGS__) - -int verbose = 0; - -#if 1//def MY_RDTSC -/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */ -#define my_rdtsc(t) \ - do { \ - u_int __regs[4]; \ - \ - do_cpuid(0, __regs); \ - (t) = rdtsc(); \ - } while (0) - -static __inline void -do_cpuid(u_int ax, u_int *p) -{ - __asm __volatile("cpuid" - : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (ax) ); -} - -static __inline uint64_t -rdtsc(void) -{ - uint64_t rv; - - // XXX does not work on linux-64 bit - __asm __volatile("rdtscp" : "=A" (rv) : : "%rax"); - return (rv); -} -#endif /* 1 */ - -struct targ; - -/*** global arguments for all threads ***/ -struct glob_arg { - struct { - uint32_t ctr[1024]; - } v __attribute__ ((aligned(256) )); - int64_t m_cycles; /* total cycles */ - int nthreads; - int cpus; - int privs; // 1 if has IO privileges - int arg; // microseconds in usleep - int nullfd; // open(/dev/null) - char *test_name; - void (*fn)(struct targ *); - uint64_t scale; // scaling factor - char *scale_name; // scaling factor -}; - -/* - * Arguments for a new thread. - */ -struct targ { - struct glob_arg *g; - int completed; - u_int *glob_ctr; - uint64_t volatile count; - struct timeval tic, toc; - int me; - pthread_t thread; - int affinity; -}; - - -static struct targ *ta; -static int global_nthreads; - -/* control-C handler */ -static void -sigint_h(int sig) -{ - int i; - - (void)sig; /* UNUSED */ - for (i = 0; i < global_nthreads; i++) { - /* cancel active threads. */ - if (ta[i].completed) - continue; - D("Cancelling thread #%d\n", i); - pthread_cancel(ta[i].thread); - ta[i].completed = 0; - } - signal(SIGINT, SIG_DFL); -} - - -/* sysctl wrapper to return the number of active CPUs */ -static int -system_ncpus(void) -{ -#ifdef linux - return 1; -#else - int mib[2] = { CTL_HW, HW_NCPU}, ncpus; - size_t len = sizeof(mib); - sysctl(mib, len / sizeof(mib[0]), &ncpus, &len, NULL, 0); - D("system had %d cpus", ncpus); - - return (ncpus); -#endif -} - -/* - * try to get I/O privileges so we can execute cli/sti etc. - */ -int -getprivs(void) -{ - int fd = open("/dev/io", O_RDWR); - if (fd < 0) { - D("cannot open /dev/io, fd %d", fd); - return 0; - } - return 1; -} - -/* set the thread affinity. */ -/* ARGSUSED */ -#ifdef HAVE_AFFINITY -static int -setaffinity(pthread_t me, int i) -{ - cpuset_t cpumask; - - if (i == -1) - return 0; - - /* Set thread affinity affinity.*/ - CPU_ZERO(&cpumask); - CPU_SET(i, &cpumask); - - if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { - D("Unable to set affinity"); - return 1; - } - return 0; -} -#endif - - -static void * -td_body(void *data) -{ - struct targ *t = (struct targ *) data; - -#ifdef HAVE_AFFINITY - if (0 == setaffinity(t->thread, t->affinity)) -#endif - { - /* main loop.*/ - D("testing %"PRIu64" cycles arg %d", - t->g->m_cycles, t->g->arg); - gettimeofday(&t->tic, NULL); - t->g->fn(t); - gettimeofday(&t->toc, NULL); - } - t->completed = 1; - return (NULL); -} - -/* - * select and poll: - * arg fd timeout - * >0 block >0 - * 0 block 0 - * block NULL (not implemented) - * < -2 ready -arg - * -1 ready 0 - * -2 ready NULL / <0 for poll - * - * arg = -1 -> NULL timeout (select) - */ -void -test_sel(struct targ *t) -{ - int arg = t->g->arg; - // stdin is blocking on reads /dev/null or /dev/zero are not - int fd = (arg < 0) ? t->g->nullfd : 0; - fd_set r; - struct timeval t0 = { 0, arg}; - struct timeval tcur, *tp = (arg == -2) ? NULL : &tcur; - int64_t m; - - if (arg == -1) - t0.tv_usec = 0; - else if (arg < -2) - t0.tv_usec = -arg; - - D("tp %p mode %s timeout %d", tp, arg < 0 ? "ready" : "block", - (int)t0.tv_usec); - for (m = 0; m < t->g->m_cycles; m++) { - int ret; - tcur = t0; - FD_ZERO(&r); - FD_SET(fd, &r); - ret = select(fd+1, &r, NULL, NULL, tp); - (void)ret; - ND("ret %d r %d w %d", ret, - FD_ISSET(fd, &r), - FD_ISSET(fd, &w)); - t->count++; - } -} - -void -test_poll(struct targ *t) -{ - int arg = t->g->arg; - // stdin is blocking on reads /dev/null is not - int fd = (arg < 0) ? t->g->nullfd : 0; - int64_t m; - int ms; - - if (arg == -1) - ms = 0; - else if (arg == -2) - ms = -1; /* blocking */ - else if (arg < 0) - ms = -arg/1000; - else - ms = arg/1000; - - D("mode %s timeout %d", arg < 0 ? "ready" : "block", ms); - for (m = 0; m < t->g->m_cycles; m++) { - struct pollfd x; - x.fd = fd; - x.events = POLLIN; - poll(&x, 1, ms); - t->count++; - } -} - -void -test_usleep(struct targ *t) -{ - int64_t m; - for (m = 0; m < t->g->m_cycles; m++) { - usleep(t->g->arg); - t->count++; - } -} - -void -test_cli(struct targ *t) -{ - int64_t m, i; - if (!t->g->privs) { - D("%s", "privileged instructions not available"); - return; - } - for (m = 0; m < t->g->m_cycles; m++) { - for (i = 0; i < ONE_MILLION; i++) { - __asm __volatile("cli;"); - __asm __volatile("and %eax, %eax;"); - __asm __volatile("sti;"); - t->count++; - } - } -} - -void -test_nop(struct targ *t) -{ - int64_t m, i; - for (m = 0; m < t->g->m_cycles; m++) { - for (i = 0; i < ONE_MILLION; i++) { - __asm __volatile("nop;"); - __asm __volatile("nop; nop; nop; nop; nop;"); - //__asm __volatile("nop; nop; nop; nop; nop;"); - t->count++; - } - } -} - -void -test_rdtsc1(struct targ *t) -{ - int64_t m, i; - uint64_t v; - (void)v; - for (m = 0; m < t->g->m_cycles; m++) { - for (i = 0; i < ONE_MILLION; i++) { - my_rdtsc(v); - t->count++; - } - } -} - -void -test_rdtsc(struct targ *t) -{ - int64_t m, i; - volatile uint64_t v; - for (m = 0; m < t->g->m_cycles; m++) { - for (i = 0; i < ONE_MILLION; i++) { - v = rdtsc(); - t->count++; - } - } - (void)v; -} - -void -test_add(struct targ *t) -{ - int64_t m, i; - for (m = 0; m < t->g->m_cycles; m++) { - for (i = 0; i < ONE_MILLION; i++) { - t->glob_ctr[0] ++; - t->count++; - } - } -} - -void -test_atomic_add(struct targ *t) -{ - int64_t m, i; - for (m = 0; m < t->g->m_cycles; m++) { - for (i = 0; i < ONE_MILLION; i++) { - atomic_add_int(t->glob_ctr, 1); - t->count++; - } - } -} - -void -test_atomic_cmpset(struct targ *t) -{ - int64_t m, i; - for (m = 0; m < t->g->m_cycles; m++) { - for (i = 0; i < ONE_MILLION; i++) { - atomic_cmpset_32(t->glob_ctr, m, i); - t->count++; - } - } -} - -void -test_time(struct targ *t) -{ - int64_t m; - for (m = 0; m < t->g->m_cycles; m++) { -#ifndef __APPLE__ - struct timespec ts; - clock_gettime(t->g->arg, &ts); -#endif - t->count++; - } -} - -void -test_gettimeofday(struct targ *t) -{ - int64_t m; - struct timeval ts; - for (m = 0; m < t->g->m_cycles; m++) { - gettimeofday(&ts, NULL); - t->count++; - } -} - -/* - * getppid is the simplest system call (getpid is cached by glibc - * so it would not be a good test) - */ -void -test_getpid(struct targ *t) -{ - int64_t m; - for (m = 0; m < t->g->m_cycles; m++) { - getppid(); - t->count++; - } -} - - -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - -static void -fast_bcopy(void *_src, void *_dst, int l) -{ - uint64_t *src = _src; - uint64_t *dst = _dst; - if (unlikely(l >= 1024)) { - bcopy(src, dst, l); - return; - } - for (; likely(l > 0); l-=64) { - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } -} - -static inline void -asmcopy(void *dst, void *src, uint64_t l) -{ - (void)dst; - (void)src; - asm( - "\n\t" - "movq %0, %%rcx\n\t" - "addq $7, %%rcx\n\t" - "shrq $03, %%rcx\n\t" - "cld\n\t" - "movq %1, %%rdi\n\t" - "movq %2, %%rsi\n\t" - "repe movsq\n\t" -/* "movq %0, %%rcx\n\t" - "andq $0x7, %%rcx\n\t" - "repe movsb\n\t" -*/ - : /* out */ - : "r" (l), "r" (dst), "r" (src) /* in */ - : "%rcx", "%rsi", "%rdi" /* clobbered */ - ); - -} -// XXX if you want to make sure there is no inlining... -// static void (*fp)(void *_src, void *_dst, int l) = fast_bcopy; - -#define HU 0x3ffff -static struct glob_arg huge[HU+1]; - -void -test_fastcopy(struct targ *t) -{ - int64_t m; - int len = t->g->arg; - - if (len > (int)sizeof(struct glob_arg)) - len = sizeof(struct glob_arg); - D("fast copying %d bytes", len); - for (m = 0; m < t->g->m_cycles; m++) { - fast_bcopy(t->g, (void *)&huge[m & HU], len); - t->count+=1; - } -} - -void -test_asmcopy(struct targ *t) -{ - int64_t m; - int len = t->g->arg; - - if (len > (int)sizeof(struct glob_arg)) - len = sizeof(struct glob_arg); - D("fast copying %d bytes", len); - for (m = 0; m < t->g->m_cycles; m++) { - asmcopy((void *)&huge[m & HU], t->g, len); - t->count+=1; - } -} - -void -test_bcopy(struct targ *t) -{ - int64_t m; - int len = t->g->arg; - - if (len > (int)sizeof(struct glob_arg)) - len = sizeof(struct glob_arg); - D("bcopying %d bytes", len); - for (m = 0; m < t->g->m_cycles; m++) { - bcopy(t->g, (void *)&huge[m & HU], len); - t->count+=1; - } -} - -void -test_builtin_memcpy(struct targ *t) -{ - int64_t m; - int len = t->g->arg; - - if (len > (int)sizeof(struct glob_arg)) - len = sizeof(struct glob_arg); - D("bcopying %d bytes", len); - for (m = 0; m < t->g->m_cycles; m++) { - __builtin_memcpy((void *)&huge[m & HU], t->g, len); - t->count+=1; - } -} - -void -test_memcpy(struct targ *t) -{ - int64_t m; - int len = t->g->arg; - - if (len > (int)sizeof(struct glob_arg)) - len = sizeof(struct glob_arg); - D("memcopying %d bytes", len); - for (m = 0; m < t->g->m_cycles; m++) { - memcpy((void *)&huge[m & HU], t->g, len); - t->count+=1; - } -} - -#include -#include // OSX -#include -#include -void -test_netmap(struct targ *t) -{ - struct nmreq nmr; - int fd; - int64_t m, scale; - - scale = t->g->m_cycles / 100; - fd = open("/dev/netmap", O_RDWR); - if (fd < 0) { - D("fail to open netmap, exit"); - return; - } - bzero(&nmr, sizeof(nmr)); - for (m = 0; m < t->g->m_cycles; m += scale) { - nmr.nr_version = 666; - nmr.nr_cmd = t->g->arg; - nmr.nr_offset = (uint32_t)scale; - ioctl(fd, NIOCGINFO, &nmr); - t->count += scale; - } - return; -} - -struct entry { - void (*fn)(struct targ *); - char *name; - uint64_t scale; - uint64_t m_cycles; -}; -struct entry tests[] = { - { test_sel, "select", 1, 1000 }, - { test_poll, "poll", 1, 1000 }, - { test_usleep, "usleep", 1, 1000 }, - { test_time, "time", 1, 1000 }, - { test_gettimeofday, "gettimeofday", 1, 1000000 }, - { test_getpid, "getpid", 1, 1000000 }, - { test_bcopy, "bcopy", 1000, 100000000 }, - { test_builtin_memcpy, "__builtin_memcpy", 1000, 100000000 }, - { test_memcpy, "memcpy", 1000, 100000000 }, - { test_fastcopy, "fastcopy", 1000, 100000000 }, - { test_asmcopy, "asmcopy", 1000, 100000000 }, - { test_add, "add", ONE_MILLION, 100000000 }, - { test_nop, "nop", ONE_MILLION, 100000000 }, - { test_atomic_add, "atomic-add", ONE_MILLION, 100000000 }, - { test_cli, "cli", ONE_MILLION, 100000000 }, - { test_rdtsc, "rdtsc", ONE_MILLION, 100000000 }, // unserialized - { test_rdtsc1, "rdtsc1", ONE_MILLION, 100000000 }, // serialized - { test_atomic_cmpset, "cmpset", ONE_MILLION, 100000000 }, - { test_netmap, "netmap", 1000, 100000000 }, - { NULL, NULL, 0, 0 } -}; - -static void -usage(void) -{ - const char *cmd = "test"; - int i; - - fprintf(stderr, - "Usage:\n" - "%s arguments\n" - "\t-m name test name\n" - "\t-n cycles (millions) of cycles\n" - "\t-l arg bytes, usec, ... \n" - "\t-t threads total threads\n" - "\t-c cores cores to use\n" - "\t-a n force affinity every n cores\n" - "\t-A n cache contention every n bytes\n" - "\t-w report_ms milliseconds between reports\n" - "", - cmd); - fprintf(stderr, "Available tests:\n"); - for (i = 0; tests[i].name; i++) { - fprintf(stderr, "%12s\n", tests[i].name); - } - - exit(0); -} - -static int64_t -getnum(const char *s) -{ - int64_t n; - char *e; - - n = strtol(s, &e, 0); - switch (e ? *e : '\0') { - case 'k': - case 'K': - return n*1000; - case 'm': - case 'M': - return n*1000*1000; - case 'g': - case 'G': - return n*1000*1000*1000; - case 't': - case 'T': - return n*1000*1000*1000*1000; - default: - return n; - } -} - -struct glob_arg g; -int -main(int argc, char **argv) -{ - int i, ch, report_interval, affinity, align; - - ND("g has size %d", (int)sizeof(g)); - report_interval = 250; /* ms */ - affinity = 0; /* no affinity */ - align = 0; /* global variable */ - - bzero(&g, sizeof(g)); - - g.privs = getprivs(); - g.nthreads = 1; - g.cpus = 1; - g.m_cycles = 0; - g.nullfd = open("/dev/zero", O_RDWR); - D("nullfd is %d", g.nullfd); - - while ( (ch = getopt(argc, argv, "A:a:m:n:w:c:t:vl:")) != -1) { - switch(ch) { - default: - D("bad option %c %s", ch, optarg); - usage(); - break; - case 'A': /* align */ - align = atoi(optarg); - break; - case 'a': /* force affinity */ - affinity = atoi(optarg); - break; - case 'n': /* cycles */ - g.m_cycles = getnum(optarg); - break; - case 'w': /* report interval */ - report_interval = atoi(optarg); - break; - case 'c': - g.cpus = atoi(optarg); - break; - case 't': - g.nthreads = atoi(optarg); - break; - case 'm': - g.test_name = optarg; - break; - case 'l': - g.arg = getnum(optarg); - break; - - case 'v': - verbose++; - break; - } - } - argc -= optind; - argv += optind; - if (!g.test_name && argc > 0) - g.test_name = argv[0]; - - if (g.test_name) { - for (i = 0; tests[i].name; i++) { - if (!strcmp(g.test_name, tests[i].name)) { - g.fn = tests[i].fn; - g.scale = tests[i].scale; - if (g.m_cycles == 0) - g.m_cycles = tests[i].m_cycles; - if (g.scale == ONE_MILLION) - g.scale_name = "M"; - else if (g.scale == 1000) - g.scale_name = "K"; - else { - g.scale = 1; - g.scale_name = ""; - } - break; - } - } - } - if (!g.fn) { - D("%s", "missing/unknown test name"); - usage(); - } - i = system_ncpus(); - if (g.cpus < 0 || g.cpus > i) { - D("%d cpus is too high, have only %d cpus", g.cpus, i); - usage(); - } - if (g.cpus == 0) - g.cpus = i; - if (g.nthreads < 1) { - D("bad nthreads %d, using 1", g.nthreads); - g.nthreads = 1; - } - i = sizeof(g.v.ctr) / g.nthreads*sizeof(g.v.ctr[0]); - if (align < 0 || align > i) { - D("bad align %d, max is %d", align, i); - align = i; - } - - /* Install ^C handler. */ - global_nthreads = g.nthreads; - signal(SIGINT, sigint_h); - - ta = calloc(g.nthreads, sizeof(*ta)); - /* - * Now create the desired number of threads, each one - * using a single descriptor. - */ - D("start %d threads on %d cores", g.nthreads, g.cpus); - for (i = 0; i < g.nthreads; i++) { - struct targ *t = &ta[i]; - bzero(t, sizeof(*t)); - t->g = &g; - t->me = i; - t->glob_ctr = &g.v.ctr[(i*align)/sizeof(g.v.ctr[0])]; - D("thread %d ptr %p", i, t->glob_ctr); - t->affinity = affinity ? (affinity*i) % g.cpus : -1; - if (pthread_create(&t->thread, NULL, td_body, t) == -1) { - D("Unable to create thread %d", i); - t->completed = 1; - } - } - /* the main loop */ - - { - uint64_t my_count = 0, prev = 0; - uint64_t count = 0; - double delta_t; - struct timeval tic, toc; - - gettimeofday(&toc, NULL); - for (;;) { - struct timeval now, delta; - uint64_t pps; - int done = 0; - - delta.tv_sec = report_interval/1000; - delta.tv_usec = (report_interval%1000)*1000; - select(0, NULL, NULL, NULL, &delta); - gettimeofday(&now, NULL); - timersub(&now, &toc, &toc); - my_count = 0; - for (i = 0; i < g.nthreads; i++) { - my_count += ta[i].count; - if (ta[i].completed) - done++; - } - pps = toc.tv_sec* ONE_MILLION + toc.tv_usec; - if (pps < 10000) - continue; - pps = (my_count - prev)*ONE_MILLION / pps; - D("%" PRIu64 " %scycles/s scale %" PRIu64 " in %dus", pps/g.scale, - g.scale_name, g.scale, (int)(toc.tv_sec* ONE_MILLION + toc.tv_usec)); - prev = my_count; - toc = now; - if (done == g.nthreads) - break; - } - D("total %" PRIu64 " cycles", prev); - - timerclear(&tic); - timerclear(&toc); - for (i = 0; i < g.nthreads; i++) { - pthread_join(ta[i].thread, NULL); - - if (ta[i].completed == 0) - continue; - - /* - * Collect threads o1utput and extract information about - * how log it took to send all the packets. - */ - count += ta[i].count; - if (!timerisset(&tic) || timercmp(&ta[i].tic, &tic, <)) - tic = ta[i].tic; - if (!timerisset(&toc) || timercmp(&ta[i].toc, &toc, >)) - toc = ta[i].toc; - } - - /* print output. */ - timersub(&toc, &tic, &toc); - delta_t = toc.tv_sec + 1e-6* toc.tv_usec; - D("total %8.6f seconds", delta_t); - } - - return (0); -} -/* end of file */ diff --git a/netmap/examples/testmmap.c b/netmap/examples/testmmap.c deleted file mode 100644 index 2f1edc7..0000000 --- a/netmap/examples/testmmap.c +++ /dev/null @@ -1,1083 +0,0 @@ -#define TEST_NETMAP - -#include -#include /* ULONG_MAX */ -#include -#include -#include -#include -#include -#include -#include -#include /* PROT_* */ -#include /* O_RDWR */ -#include -#include - - -#define MAX_VARS 100 - -char *variables[MAX_VARS]; -int curr_var; - -#define VAR_FAILED ((void*)1) - -char *firstarg(char *buf) -{ - int v; - char *arg = strtok(buf, " \t\n"); - char *ret; - if (!arg) - return NULL; - if (arg[0] != '$' && arg[0] != '?') - return arg; - v = atoi(arg+1); - if (v < 0 || v >= MAX_VARS) - return ""; - ret = variables[v]; - if (ret == NULL) - return "NULL"; - if (ret == VAR_FAILED) { - printf("reading failed var, exit\n"); - exit(1); - } - if (arg[0] == '?') - return ret; - ret = rindex(ret, '=') + 1; - return ret; -} - -char *nextarg() -{ - return firstarg(NULL); -} - -char *restofline() -{ - return strtok(NULL, "\n"); -} - -void resetvar(int v, char *b) -{ - if (variables[v] != VAR_FAILED) - free(variables[v]); - variables[v] = b; -} - -#define outecho(format, args...) \ - do {\ - printf("%u:%lu: " format "\n", getpid(), (unsigned long) pthread_self(), ##args);\ - fflush(stdout);\ - } while (0) - -#define output(format, args...) \ - do {\ - resetvar(curr_var, (char*)malloc(1024));\ - snprintf(variables[curr_var], 1024, format, ##args);\ - outecho(format, ##args);\ - } while (0) - -#define output_err(ret, format, args...)\ - do {\ - if (ret < 0) {\ - resetvar(curr_var, VAR_FAILED);\ - outecho(format, ##args);\ - outecho("error: %s", strerror(errno));\ - } else {\ - output(format, ##args);\ - }\ - } while (0) - -struct chan { - FILE *out; - pid_t pid; - pthread_t tid; -}; - -int chan_search_free(struct chan* c[], int max) -{ - int i; - - for (i = 0; i < max && c[i]; i++) - ; - - return i; -} - -void chan_clear_all(struct chan *c[], int max) -{ - int i; - - for (i = 0; i < max; i++) { - if (c[i]) { - fclose(c[i]->out); - free(c[i]); - c[i] = NULL; - } - } -} - -int last_fd = -1; -size_t last_memsize = 0; -void* last_mmap_addr = NULL; -char* last_access_addr = NULL; -struct nmreq curr_nmr; -char nmr_name[64]; - - -void do_open() -{ - last_fd = open("/dev/netmap", O_RDWR); - output_err(last_fd, "open(\"/dev/netmap\", O_RDWR)=%d", last_fd); -} - -void do_close() -{ - int ret, fd; - char *arg = nextarg(); - fd = arg ? atoi(arg) : last_fd; - ret = close(fd); - output_err(ret, "close(%d)=%d", fd, ret); -} - -#ifdef TEST_NETMAP -#include -#include -#include -#include - -void parse_nmr_config(char* w, struct nmreq *nmr) -{ - char *tok; - int i, v; - - nmr->nr_tx_rings = nmr->nr_rx_rings = 0; - nmr->nr_tx_slots = nmr->nr_rx_slots = 0; - if (w == NULL || ! *w) - return; - for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { - v = atoi(tok); - switch (i) { - case 0: - nmr->nr_tx_slots = nmr->nr_rx_slots = v; - break; - case 1: - nmr->nr_rx_slots = v; - break; - case 2: - nmr->nr_tx_rings = nmr->nr_rx_rings = v; - break; - case 3: - nmr->nr_rx_rings = v; - break; - default: - break; - } - } -} - -void do_getinfo() -{ - int ret; - char *arg, *name; - int fd; - - bzero(&curr_nmr, sizeof(curr_nmr)); - curr_nmr.nr_version = NETMAP_API; - - name = nextarg(); - if (name) { - strncpy(curr_nmr.nr_name, name, sizeof(curr_nmr.nr_name)); - } else { - name = "any"; - } - - arg = nextarg(); - if (!arg) { - fd = last_fd; - goto doit; - } - fd = atoi(arg); - - arg = nextarg(); - parse_nmr_config(arg, &curr_nmr); - -doit: - ret = ioctl(fd, NIOCGINFO, &curr_nmr); - last_memsize = curr_nmr.nr_memsize; - output_err(ret, "ioctl(%d, NIOCGINFO) for %s: region %d memsize=%zu", - fd, name, curr_nmr.nr_arg2, last_memsize); -} - - -void do_regif() -{ - int ret; - char *arg, *name; - int fd = last_fd; - - name = nextarg(); - if (!name) { - name = nmr_name; - goto doit; - } - - bzero(&curr_nmr, sizeof(curr_nmr)); - curr_nmr.nr_version = NETMAP_API; - strncpy(curr_nmr.nr_name, name, sizeof(curr_nmr.nr_name)); - - arg = nextarg(); - if (!arg) { - goto doit; - } - fd = atoi(arg); - - arg = nextarg(); - parse_nmr_config(arg, &curr_nmr); - -doit: - ret = ioctl(fd, NIOCREGIF, &curr_nmr); - last_memsize = curr_nmr.nr_memsize; - output_err(ret, "ioctl(%d, NIOCREGIF) for %s: region %d memsize=%zu", - fd, name, curr_nmr.nr_arg2, last_memsize); -} - - -void -do_txsync() -{ - char *arg = nextarg(); - int fd = arg ? atoi(arg) : last_fd; - int ret = ioctl(fd, NIOCTXSYNC, NULL); - output_err(ret, "ioctl(%d, NIOCTXSYNC)=%d", fd, ret); -} - -void -do_rxsync() -{ - char *arg = nextarg(); - int fd = arg ? atoi(arg) : last_fd; - int ret = ioctl(fd, NIOCRXSYNC, NULL); - output_err(ret, "ioctl(%d, NIOCRXSYNC)=%d", fd, ret); -} -#endif /* TEST_NETMAP */ - - -volatile char tmp1; -void do_access() -{ - char *arg = nextarg(); - char *p; - if (!arg) { - if (!last_access_addr) { - output("missing address"); - return; - } - p = last_access_addr; - } else { - p = (char *)strtoul((void *)arg, NULL, 0); - } - last_access_addr = p + 4096; - tmp1 = *p; -} - -void do_mmap() -{ - size_t memsize; - off_t off = 0; - int fd; - char *arg; - - arg = nextarg(); - if (!arg) { - memsize = last_memsize; - fd = last_fd; - goto doit; - } - memsize = atoi(arg); - arg = nextarg(); - if (!arg) { - fd = last_fd; - goto doit; - } - fd = atoi(arg); - arg = nextarg(); - if (arg) { - off = (off_t)atol(arg); - } -doit: - last_mmap_addr = mmap(0, memsize, - PROT_WRITE | PROT_READ, - MAP_SHARED, fd, off); - if (last_access_addr == NULL) - last_access_addr = last_mmap_addr; - output_err(last_mmap_addr == MAP_FAILED ? -1 : 0, - "mmap(0, %zu, PROT_WRITE|PROT_READ, MAP_SHARED, %d, %jd)=%p", - memsize, fd, (intmax_t)off, last_mmap_addr); - -} - -void do_munmap() -{ - void *mmap_addr; - size_t memsize; - char *arg; - int ret; - - arg = nextarg(); - if (!arg) { - mmap_addr = last_mmap_addr; - memsize = last_memsize; - goto doit; - } - mmap_addr = (void*)strtoul(arg, NULL, 0); - arg = nextarg(); - if (!arg) { - memsize = last_memsize; - goto doit; - } - memsize = (size_t)strtoul(arg, NULL, 0); -doit: - ret = munmap(mmap_addr, memsize); - output_err(ret, "munmap(%p, %zu)=%d", mmap_addr, memsize, ret); -} - -void do_poll() -{ - /* timeout fd fd... */ - nfds_t nfds = 0, allocated_fds = 10, i; - struct pollfd *fds; - int timeout = 500; /* 1/2 second */ - char *arg; - int ret; - - arg = nextarg(); - if (arg) - timeout = atoi(arg); - fds = malloc(allocated_fds * sizeof(struct pollfd)); - if (fds == NULL) { - output_err(-1, "out of memory"); - return; - } - while ( (arg = nextarg()) ) { - if (nfds >= allocated_fds) { - allocated_fds *= 2; - fds = realloc(fds, allocated_fds * sizeof(struct pollfd)); - if (fds == NULL) { - output_err(-1, "out of memory"); - return; - } - } - fds[nfds].fd = atoi(arg); - fds[nfds].events = POLLIN; - nfds++; - } - ret = poll(fds, nfds, timeout); - for (i = 0; i < nfds; i++) { - output("poll(%d)=%s%s%s%s%s", fds[i].fd, - (fds[i].revents & POLLIN) ? "IN " : "- ", - (fds[i].revents & POLLOUT)? "OUT " : "- ", - (fds[i].revents & POLLERR)? "ERR " : "- ", - (fds[i].revents & POLLHUP)? "HUP " : "- ", - (fds[i].revents & POLLNVAL)?"NVAL" : "-"); - - } - output_err(ret, "poll(...)=%d", ret); - free(fds); -} - - -void -do_expr() -{ - unsigned long stack[11]; - int top = 10; - char *arg; - int err = 0; - - stack[10] = ULONG_MAX; - while ( (arg = nextarg()) ) { - errno = 0; - char *rest; - unsigned long n = strtoul(arg, &rest, 0); - if (!errno && rest != arg) { - if (top <= 0) { - err = -1; - break; - } - stack[--top] = n; - continue; - } - if (top <= 8) { - unsigned long n1 = stack[top++]; - unsigned long n2 = stack[top++]; - unsigned long r = 0; - switch (arg[0]) { - case '+': - r = n1 + n2; - break; - case '-': - r = n1 - n2; - break; - case '*': - r = n1 * n2; - break; - case '/': - if (n2) - r = n1 / n2; - else { - errno = EDOM; - err = -1; - } - break; - default: - err = -1; - break; - } - stack[--top] = r; - continue; - } - err = -1; - break; - } - output_err(err, "expr=%lu", stack[top]); -} - - - -void -do_echo() -{ - char *arg; - for (arg = nextarg(); arg; arg = nextarg()) { - printf("%s\n", arg); - } -} - -void -do_vars() -{ - int i; - for (i = 0; i < MAX_VARS; i++) { - const char *v = variables[i]; - if (v == NULL) - continue; - printf("?%d\t%s\n", i, v == VAR_FAILED ? "FAILED" : v); - } -} - - -struct cmd_def { - const char *name; - void (*f)(void); -}; - -int _find_command(const struct cmd_def *cmds, int ncmds, const char* cmd) -{ - int i; - for (i = 0; i < ncmds; i++) { - if (strcmp(cmds[i].name, cmd) == 0) - break; - } - return i; -} - -typedef void (*nmr_arg_interp_fun)(); - -#define nmr_arg_unexpected(n) \ - printf("arg%d: %d%s\n", n, curr_nmr.nr_arg ## n, \ - (curr_nmr.nr_arg ## n ? "???" : "")) - -void -nmr_arg_bdg_attach() -{ - uint16_t v = curr_nmr.nr_arg1; - printf("arg1: %d [", v); - if (v == 0) { - printf("no host rings"); - } else if (v == NETMAP_BDG_HOST) { - printf("BDG_HOST"); - } else { - printf("???"); - } - printf("]\n"); - nmr_arg_unexpected(2); - nmr_arg_unexpected(3); -} - -void -nmr_arg_bdg_detach() -{ - nmr_arg_unexpected(1); - nmr_arg_unexpected(2); - nmr_arg_unexpected(3); -} - -void -nmr_arg_bdg_list() -{ -} - -void -nmr_arg_lookup_reg() -{ -} - -void -nmr_arg_vnet_hdr() -{ - printf("arg1: %d [vnet hdr len]", curr_nmr.nr_arg1); - nmr_arg_unexpected(2); - nmr_arg_unexpected(3); -} - -void -nmr_arg_error() -{ - nmr_arg_unexpected(1); - nmr_arg_unexpected(2); - nmr_arg_unexpected(3); -} - -void -nmr_arg_extra() -{ - printf("arg1: %d [%sextra rings]\n", curr_nmr.nr_arg1, - (curr_nmr.nr_arg1 ? "" : "no ")); - printf("arg2: %d [%s memory allocator]\n", curr_nmr.nr_arg2, - (curr_nmr.nr_arg2 == 0 ? "global" : "private")); - printf("arg3: %d [%sextra buffers]\n", curr_nmr.nr_arg3, - (curr_nmr.nr_arg3 ? "" : "no ")); -} - -void -do_nmr_dump() -{ - u_int ringid = curr_nmr.nr_ringid & NETMAP_RING_MASK; - nmr_arg_interp_fun arg_interp; - - snprintf(nmr_name, IFNAMSIZ + 1, "%s", curr_nmr.nr_name); - nmr_name[IFNAMSIZ] = '\0'; - printf("name: %s\n", nmr_name); - printf("version: %d\n", curr_nmr.nr_version); - printf("offset: %d\n", curr_nmr.nr_offset); - printf("memsize: %d [", curr_nmr.nr_memsize); - if (curr_nmr.nr_memsize < (1<<20)) { - printf("%d KiB", curr_nmr.nr_memsize >> 10); - } else { - printf("%d MiB", curr_nmr.nr_memsize >> 20); - } - printf("]\n"); - printf("tx_slots: %d\n", curr_nmr.nr_tx_slots); - printf("rx_slots: %d\n", curr_nmr.nr_rx_slots); - printf("tx_rings: %d\n", curr_nmr.nr_tx_rings); - printf("rx_rings: %d\n", curr_nmr.nr_rx_rings); - printf("ringid: %x [", curr_nmr.nr_ringid); - if (curr_nmr.nr_ringid & NETMAP_SW_RING) { - printf("host rings"); - } else if (curr_nmr.nr_ringid & NETMAP_HW_RING) { - printf("hw ring %d", ringid); - } else { - printf("hw rings"); - } - if (curr_nmr.nr_ringid & NETMAP_NO_TX_POLL) { - printf(", no tx poll"); - } - printf(", region %d", curr_nmr.nr_arg2); - printf("]\n"); - printf("cmd: %d", curr_nmr.nr_cmd); - if (curr_nmr.nr_cmd) { - printf("["); - switch (curr_nmr.nr_cmd) { - case NETMAP_BDG_ATTACH: - printf("BDG_ATTACH"); - arg_interp = nmr_arg_bdg_attach; - break; - case NETMAP_BDG_DETACH: - printf("BDG_DETACH"); - arg_interp = nmr_arg_bdg_detach; - break; - case NETMAP_BDG_LIST: - printf("BDG_LIST"); - arg_interp = nmr_arg_bdg_list; - break; - case NETMAP_BDG_LOOKUP_REG: - printf("BDG_LOOKUP_REG"); - arg_interp = nmr_arg_lookup_reg; - break; - case NETMAP_BDG_VNET_HDR: - printf("BDG_VNET_HDR"); - arg_interp = nmr_arg_vnet_hdr; - break; - default: - printf("???"); - arg_interp = nmr_arg_error; - break; - } - printf("]\n"); - } else { - arg_interp = nmr_arg_extra; - } - printf("\n"); - arg_interp(); - printf("flags: %x [", curr_nmr.nr_flags); - switch (curr_nmr.nr_flags & NR_REG_MASK) { - case NR_REG_DEFAULT: - printf("obey ringid"); - break; - case NR_REG_ALL_NIC: - printf("ALL_NIC"); - break; - case NR_REG_SW: - printf("SW"); - break; - case NR_REG_NIC_SW: - printf("NIC_SW"); - break; - case NR_REG_ONE_NIC: - printf("ONE_NIC(%d)", ringid); - break; - case NR_REG_PIPE_MASTER: - printf("PIPE_MASTER(%d)", ringid); - break; - case NR_REG_PIPE_SLAVE: - printf("PIPE_SLAVE(%d)", ringid); - break; - default: - printf("???"); - break; - } - if (curr_nmr.nr_flags & NR_MONITOR_TX) { - printf(", MONITOR_TX"); - } - if (curr_nmr.nr_flags & NR_MONITOR_RX) { - printf(", MONITOR_RX"); - } - printf("]\n"); - printf("spare2[0]: %x\n", curr_nmr.spare2[0]); -} - -void -do_nmr_reset() -{ - bzero(&curr_nmr, sizeof(curr_nmr)); -} - -void -do_nmr_name() -{ - char *name = nextarg(); - if (name) { - strncpy(curr_nmr.nr_name, name, IFNAMSIZ); - } - strncpy(nmr_name, curr_nmr.nr_name, IFNAMSIZ); - nmr_name[IFNAMSIZ] = '\0'; - output("name=%s", nmr_name); -} - -void -do_nmr_ringid() -{ - char *arg; - uint16_t ringid = curr_nmr.nr_ringid; - int n; - for (n = 0, arg = nextarg(); arg; arg = nextarg(), n++) { - if (strcmp(arg, "hw-ring") == 0) { - ringid |= NETMAP_HW_RING; - } else if (strcmp(arg, "sw-ring") == 0) { - ringid |= NETMAP_SW_RING; - } else if (strcmp(arg, "no-tx-poll") == 0) { - ringid |= NETMAP_NO_TX_POLL; - } else if (strcmp(arg, "default") == 0) { - ringid = 0; - } else { - ringid &= ~NETMAP_RING_MASK; - ringid |= (atoi(arg) & NETMAP_RING_MASK); - } - } - if (n) - curr_nmr.nr_ringid = ringid; - output("ringid=%x", curr_nmr.nr_ringid); -} - -void -do_nmr_cmd() -{ -} - -void -do_nmr_flags() -{ - char *arg; - uint32_t flags = curr_nmr.nr_flags; - int n; - for (n = 0, arg = nextarg(); arg; arg = nextarg(), n++) { - if (strcmp(arg, "all-nic") == 0) { - flags &= ~NR_REG_MASK; - flags |= NR_REG_ALL_NIC; - } else if (strcmp(arg, "sw") == 0) { - flags &= ~NR_REG_MASK; - flags |= NR_REG_SW; - } else if (strcmp(arg, "nic-sw") == 0) { - flags &= ~NR_REG_MASK; - flags |= NR_REG_NIC_SW; - } else if (strcmp(arg, "pipe-master") == 0) { - flags &= ~NR_REG_MASK; - flags |= NR_REG_PIPE_MASTER; - } else if (strcmp(arg, "pipe-slave") == 0) { - flags &= ~NR_REG_MASK; - flags |= NR_REG_PIPE_SLAVE; - } else if (strcmp(arg, "monitor-tx") == 0) { - flags |= NR_MONITOR_TX; - } else if (strcmp(arg, "monitor-rx") == 0) { - flags |= NR_MONITOR_RX; - } else if (strcmp(arg, "default") == 0) { - flags = 0; - } - } - if (n) - curr_nmr.nr_flags = flags; - output("flags=%x", curr_nmr.nr_flags); -} - -struct cmd_def nmr_commands[] = { - { "dump", do_nmr_dump }, - { "reset", do_nmr_reset }, - { "name", do_nmr_name }, - { "ringid", do_nmr_ringid }, - { "cmd", do_nmr_cmd }, - { "flags", do_nmr_flags }, -}; - -const int N_NMR_CMDS = sizeof(nmr_commands) / sizeof(struct cmd_def); - -int -find_nmr_command(const char *cmd) -{ - return _find_command(nmr_commands, N_NMR_CMDS, cmd); -} - -#define nmr_arg_update(f) \ - ({ \ - int __ret = 0; \ - if (strcmp(cmd, #f) == 0) { \ - char *arg = nextarg(); \ - if (arg) { \ - curr_nmr.nr_##f = strtol(arg, NULL, 0); \ - } \ - output(#f "=%d", curr_nmr.nr_##f); \ - __ret = 1; \ - } \ - __ret; \ - }) - -/* prepare the curr_nmr */ -void -do_nmr() -{ - char *cmd = nextarg(); - int i; - - if (cmd == NULL) { - do_nmr_dump(); - return; - } - if (cmd[0] == '.') { - cmd++; - } else { - i = find_nmr_command(cmd); - if (i < N_NMR_CMDS) { - nmr_commands[i].f(); - return; - } - } - if (nmr_arg_update(version) || - nmr_arg_update(offset) || - nmr_arg_update(memsize) || - nmr_arg_update(tx_slots) || - nmr_arg_update(rx_slots) || - nmr_arg_update(tx_rings) || - nmr_arg_update(rx_rings) || - nmr_arg_update(ringid) || - nmr_arg_update(cmd) || - nmr_arg_update(arg1) || - nmr_arg_update(arg2) || - nmr_arg_update(arg3) || - nmr_arg_update(flags)) - return; - output("unknown field: %s", cmd); -} - - - -struct cmd_def commands[] = { - { "open", do_open, }, - { "close", do_close, }, -#ifdef TEST_NETMAP - { "getinfo", do_getinfo, }, - { "regif", do_regif, }, - { "txsync", do_txsync, }, - { "rxsync", do_rxsync, }, -#endif /* TEST_NETMAP */ - { "mmap", do_mmap, }, - { "access", do_access, }, - { "munmap", do_munmap, }, - { "poll", do_poll, }, - { "expr", do_expr, }, - { "echo", do_echo, }, - { "vars", do_vars, }, - { "nmr", do_nmr, } -}; - -const int N_CMDS = sizeof(commands) / sizeof(struct cmd_def); - -int find_command(const char* cmd) -{ - return _find_command(commands, N_CMDS, cmd); -} - -#define MAX_CHAN 10 - -void prompt() -{ - if (isatty(STDIN_FILENO)) { - printf("> "); - } -} - -struct chan *channels[MAX_CHAN]; - -void* -thread_cmd_loop(void *arg) -{ - char buf[1024]; - FILE *in = (FILE*)arg; - - while (fgets(buf, 1024, in)) { - char *cmd; - int i; - - cmd = firstarg(buf); - i = find_command(cmd); - if (i < N_CMDS) { - commands[i].f(); - continue; - } - output("unknown cmd %s", cmd); - } - fclose(in); - return NULL; -} - -void do_exit() -{ - output("quit"); -} - -void -cmd_loop() -{ - char buf[1024]; - int i; - struct chan *c; - - bzero(channels, sizeof(*channels) * MAX_CHAN); - - atexit(do_exit); - - for (prompt(); fgets(buf, 1024, stdin); prompt()) { - char *cmd; - int slot; - - cmd = firstarg(buf); - if (!cmd) - continue; - if (cmd[0] == '@') { - curr_var = atoi(cmd + 1); - if (curr_var < 0 || curr_var >= MAX_VARS) - curr_var = 0; - cmd = nextarg(); - if (!cmd) - continue; - } else { - curr_var = 0; - } - - if (strcmp(cmd, "fork") == 0) { - int slot = chan_search_free(channels, MAX_CHAN); - struct chan *c = NULL; - pid_t pid; - int p1[2] = { -1, -1}; - - if (slot == MAX_CHAN) { - output("too many channels"); - continue; - } - c = channels[slot] = (struct chan*)malloc(sizeof(struct chan)); - if (c == NULL) { - output_err(-1, "malloc"); - continue; - } - bzero(c, sizeof(*c)); - if (pipe(p1) < 0) { - output_err(-1, "pipe"); - goto clean1; - } - c->out = fdopen(p1[1], "w"); - if (c->out == NULL) { - output_err(-1, "fdopen"); - goto clean1; - } - pid = fork(); - switch (pid) { - case -1: - output_err(-1, "fork"); - goto clean1; - case 0: - fclose(stdin); - if (dup(p1[0]) < 0) { - output_err(-1, "dup"); - exit(1); - } - close(p1[1]); - stdin = fdopen(0, "r"); - chan_clear_all(channels, MAX_CHAN); - goto out; - default: - break; - } - c->pid = pid; - close(p1[0]); - output("fork()=%d slot=%d", pid, slot); - continue; - clean1: - if (c) { - fclose(c->out); - } - close(p1[0]); - close(p1[1]); - free(c); - out: - continue; - } - if (strcmp(cmd, "kill") == 0) { - int ret; - - cmd = nextarg(); - if (!cmd) { - output("missing slot"); - continue; - } - slot = atoi(cmd); - if (slot < 0 || slot >= MAX_CHAN || !channels[slot]) { - output("invalid slot: %s", cmd); - continue; - } - c = channels[slot]; - ret = kill(c->pid, SIGTERM); - output_err(ret, "kill(%d, SIGTERM)=%d", c->pid, ret); - if (ret != -1) { - wait(NULL); - fclose(c->out); - free(c); - channels[slot] = NULL; - } - continue; - } - if (strcmp(cmd, "thread") == 0) { - int slot = chan_search_free(channels, MAX_CHAN); - struct chan *c = NULL; - pthread_t tid; - int p1[2] = { -1, -1}; - int ret; - FILE *in = NULL; - - if (slot == MAX_CHAN) { - output("too many channels"); - continue; - } - c = channels[slot] = (struct chan*)malloc(sizeof(struct chan)); - bzero(c, sizeof(*c)); - if (pipe(p1) < 0) { - output_err(-1, "pipe"); - goto clean2; - } - c->out = fdopen(p1[1], "w"); - if (c->out == NULL) { - output_err(-1, "fdopen"); - goto clean2; - } - in = fdopen(p1[0], "r"); - if (in == NULL) { - output_err(-1, "fdopen"); - goto clean2; - } - ret = pthread_create(&tid, NULL, thread_cmd_loop, in); - output_err(ret, "pthread_create() tid=%lu slot=%d", - (unsigned long) tid, slot); - if (ret < 0) - goto clean2; - c->pid = getpid(); - c->tid = tid; - continue; - clean2: - fclose(in); - fclose(c->out); - close(p1[0]); - close(p1[1]); - free(c); - continue; - } - if (strcmp(cmd, "cancel") == 0) { - int ret; - - cmd = nextarg(); - if (!cmd) { - output("missing slot"); - continue; - } - slot = atoi(cmd); - if (slot < 0 || slot >= MAX_CHAN || !channels[slot]) { - output("invalid slot: %s", cmd); - continue; - } - c = channels[slot]; - fclose(c->out); - ret = pthread_join(c->tid, NULL); - output_err(ret, "pthread_join(%lu)=%d", - (unsigned long) c->tid, ret); - if (ret > 0) { - free(c); - channels[slot] = NULL; - } - continue; - } - i = find_command(cmd); - if (i < N_CMDS) { - commands[i].f(); - continue; - } - slot = atoi(cmd); - if (slot < 0 || slot > MAX_CHAN || !channels[slot]) { - output("invalid cmd/slot: %s", cmd); - continue; - } - cmd = restofline(); - if (!cmd) { - output("missing command"); - continue; - } - fprintf(channels[slot]->out, "%s\n", cmd); - fflush(channels[slot]->out); - sleep(1); - } -} - -int -main(int argc, char **argv) -{ - (void) argc; - (void) argv; - cmd_loop(); - return 0; -} diff --git a/netmap/examples/vale-ctl.c b/netmap/examples/vale-ctl.c deleted file mode 100644 index e1d8da5..0000000 --- a/netmap/examples/vale-ctl.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (C) 2013-2014 Michio Honda. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* $FreeBSD$ */ - -#include -#include -#include /* PRI* macros */ -#include /* strcmp */ -#include /* open */ -#include /* close */ -#include /* ioctl */ -#include -#include /* apple needs sockaddr */ -#include /* ifreq */ -#include -#include -#include /* basename */ - -/* debug support */ -#define ND(format, ...) do {} while(0) -#define D(format, ...) \ - fprintf(stderr, "%s [%d] " format "\n", \ - __FUNCTION__, __LINE__, ##__VA_ARGS__) - -static int -bdg_ctl(const char *name, int nr_cmd, int nr_arg) -{ - struct nmreq nmr; - int error = 0; - int fd = open("/dev/netmap", O_RDWR); - - if (fd == -1) { - D("Unable to open /dev/netmap"); - return -1; - } - - bzero(&nmr, sizeof(nmr)); - nmr.nr_version = NETMAP_API; - if (name != NULL) /* might be NULL */ - strncpy(nmr.nr_name, name, sizeof(nmr.nr_name)); - nmr.nr_cmd = nr_cmd; - - switch (nr_cmd) { - case NETMAP_BDG_ATTACH: - case NETMAP_BDG_DETACH: - if (nr_arg && nr_arg != NETMAP_BDG_HOST) - nr_arg = 0; - nmr.nr_arg1 = nr_arg; - error = ioctl(fd, NIOCREGIF, &nmr); - if (error == -1) { - ND("Unable to %s %s to the bridge", nr_cmd == - NETMAP_BDG_DETACH?"detach":"attach", name); - perror(name); - } else - ND("Success to %s %s to the bridge", nr_cmd == - NETMAP_BDG_DETACH?"detach":"attach", name); - break; - - case NETMAP_BDG_LIST: - if (strlen(nmr.nr_name)) { /* name to bridge/port info */ - error = ioctl(fd, NIOCGINFO, &nmr); - if (error) { - ND("Unable to obtain info for %s", name); - perror(name); - } else - D("%s at bridge:%d port:%d", name, nmr.nr_arg1, - nmr.nr_arg2); - break; - } - - /* scan all the bridges and ports */ - nmr.nr_arg1 = nmr.nr_arg2 = 0; - for (; !ioctl(fd, NIOCGINFO, &nmr); nmr.nr_arg2++) { - D("bridge:%d port:%d %s", nmr.nr_arg1, nmr.nr_arg2, - nmr.nr_name); - nmr.nr_name[0] = '\0'; - } - - break; - - default: /* GINFO */ - nmr.nr_cmd = nmr.nr_arg1 = nmr.nr_arg2 = 0; - error = ioctl(fd, NIOCGINFO, &nmr); - if (error) { - ND("Unable to get if info for %s", name); - perror(name); - } else - D("%s: %d queues.", name, nmr.nr_rx_rings); - break; - } - close(fd); - return error; -} - -int -main(int argc, char *argv[]) -{ - int ch, nr_cmd = 0, nr_arg = 0; - const char *command = basename(argv[0]); - char *name = NULL; - - if (argc > 3) { -usage: - fprintf(stderr, - "Usage:\n" - "%s arguments\n" - "\t-g interface interface name to get info\n" - "\t-d interface interface name to be detached\n" - "\t-a interface interface name to be attached\n" - "\t-h interface interface name to be attached with the host stack\n" - "\t-l list all or specified bridge's interfaces (default)\n" - "", command); - return 0; - } - - while ((ch = getopt(argc, argv, "d:a:h:g:l")) != -1) { - name = optarg; /* default */ - switch (ch) { - default: - fprintf(stderr, "bad option %c %s", ch, optarg); - goto usage; - case 'd': - nr_cmd = NETMAP_BDG_DETACH; - break; - case 'a': - nr_cmd = NETMAP_BDG_ATTACH; - break; - case 'h': - nr_cmd = NETMAP_BDG_ATTACH; - nr_arg = NETMAP_BDG_HOST; - break; - case 'g': - nr_cmd = 0; - break; - case 'l': - nr_cmd = NETMAP_BDG_LIST; - if (optind < argc && argv[optind][0] == '-') - name = NULL; - break; - } - if (optind != argc) { - // fprintf(stderr, "optind %d argc %d\n", optind, argc); - goto usage; - } - } - if (argc == 1) - nr_cmd = NETMAP_BDG_LIST; - return bdg_ctl(name, nr_cmd, nr_arg) ? 1 : 0; -} diff --git a/netmap/extra/bro-netmap.diff b/netmap/extra/bro-netmap.diff deleted file mode 100644 index 207e641..0000000 --- a/netmap/extra/bro-netmap.diff +++ /dev/null @@ -1,95 +0,0 @@ -diff --git a/src/PktSrc.cc b/src/PktSrc.cc -index 9d6bce6..e8f59dd 100644 ---- a/src/PktSrc.cc -+++ b/src/PktSrc.cc -@@ -11,6 +11,26 @@ - #include "Net.h" - #include "Sessions.h" - -+#define HAVE_NETMAP -+ -+#ifdef HAVE_NETMAP -+ -+// Compile in netmap support. If the interface name starts with -+// "netmap:" or "vale" we use a netmap fd instead of pcap, and bind -+// one or all rings depending on NETMAP_RING_ID environment variable. -+// -+// For a test run you can use the vale switch, -+// pkt-gen -i vale1:b -f tx -R ..rate_in_pps -+// and launch bro like this -+/* -+ -+BROPATH=`./bro-path-dev` ./src/bro -i vale1:a -b -e 'global l=0; event p(){local s=net_stats(); local c=s$pkts_recvd;print c-l;l=c; schedule 1 sec {p()};} event bro_init(){event p();}' -+ -+ */ -+#define NETMAP_WITH_LIBS -+#include -+ -+#endif /* HAVE_NETMAP */ - - // ### This needs auto-confing. - #ifdef HAVE_PCAP_INT_H -@@ -75,7 +95,14 @@ int PktSrc::ExtractNextPacket() - return 0; - } - -+#ifdef HAVE_NETMAP -+ // in netmap mode call netmap equivalent of pcap_next() -+ if (IS_NETMAP_DESC(pd)) -+ data = last_data = nm_nextpkt((struct nm_desc *)pd, -+ (struct nm_pkthdr *)&hdr); -+ else -+#endif /* HAVE_NETMAP */ - data = last_data = pcap_next(pd, &hdr); - - if ( data && (hdr.len == 0 || hdr.caplen == 0) ) - { -@@ -407,6 +435,11 @@ void PktSrc::Close() - { - if ( pd ) - { -+#ifdef HAVE_NETMAP -+ if (IS_NETMAP_DESC(pd)) -+ nm_close((struct nm_desc *)pd); -+ else -+#endif /* HAVE_NETMAP */ - pcap_close(pd); - pd = 0; - closed = true; -@@ -443,6 +476,14 @@ void PktSrc::Statistics(Stats* s) - else - { - struct pcap_stat pstat; -+#ifdef HAVE_NETMAP -+ if (IS_NETMAP_DESC(pd)) -+ { -+ s->dropped = stats.dropped; -+ s->link = stats.received; -+ } -+ else -+#endif /* HAVE_NETMAP */ - if ( pcap_stats(pd, &pstat) < 0 ) - { - reporter->Error("problem getting packet filter statistics: %s", -@@ -482,6 +523,21 @@ PktInterfaceSrc::PktInterfaceSrc(const char* arg_interface, const char* filter, - - interface = copy_string(arg_interface); - -+#ifdef HAVE_NETMAP -+ pd = (pcap_t *)nm_open(interface, getenv("NETMAP_RING_ID"), 0, 0); -+ // netmap interfaces are named netmap:* or vale* -+ // If pd == 0 && errno == 0 "interface" is not a valid -+ // netmap interface name, so we fall through to pcap -+ if (pd || errno > 0) -+ { -+ if (pd) -+ selectable_fd = NETMAP_FD(pd); -+ else -+ closed = true; -+ return; -+ } -+#endif /* HAVE_NETMAP */ -+ - // Determine network and netmask. - uint32 net; - if ( pcap_lookupnet(interface, &net, &netmask, tmp_errbuf) < 0 ) diff --git a/netmap/extra/libpcap-netmap.diff b/netmap/extra/libpcap-netmap.diff deleted file mode 100644 index cd6c389..0000000 --- a/netmap/extra/libpcap-netmap.diff +++ /dev/null @@ -1,389 +0,0 @@ -diff --git a/Makefile.in b/Makefile.in -index 9995458..c670d66 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -83,7 +83,7 @@ YACC = @V_YACC@ - @rm -f $@ - $(CC) $(FULL_CFLAGS) -c $(srcdir)/$*.c - --PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@ @DBUS_SRC@ -+PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@ @DBUS_SRC@ @NETMAP_SRC@ - FSRC = fad-@V_FINDALLDEVS@.c - SSRC = @SSRC@ - CSRC = pcap.c inet.c gencode.c optimize.c nametoaddr.c etherent.c \ -@@ -313,6 +313,7 @@ EXTRA_DIST = \ - pcap-namedb.h \ - pcap-netfilter-linux.c \ - pcap-netfilter-linux.h \ -+ pcap-netmap.c \ - pcap-nit.c \ - pcap-null.c \ - pcap-pf.c \ -diff --git a/config.h.in b/config.h.in -index c6bc68e..09c8557 100644 ---- a/config.h.in -+++ b/config.h.in -@@ -268,6 +268,9 @@ - /* target host supports netfilter sniffing */ - #undef PCAP_SUPPORT_NETFILTER - -+/* target host supports netmap */ -+#undef PCAP_SUPPORT_NETMAP -+ - /* target host supports USB sniffing */ - #undef PCAP_SUPPORT_USB - -diff --git a/configure b/configure -index be87668..a8d0cae 100755 ---- a/configure -+++ b/configure -@@ -626,6 +626,8 @@ INSTALL_PROGRAM - DBUS_SRC - PCAP_SUPPORT_DBUS - PKGCONFIG -+NETMAP_SRC -+PCAP_SUPPORT_NETMAP - CAN_SRC - PCAP_SUPPORT_CAN - CANUSB_SRC -@@ -747,6 +749,7 @@ enable_shared - enable_bluetooth - enable_canusb - enable_can -+enable_netmap - enable_dbus - ' - ac_precious_vars='build_alias -@@ -1385,6 +1388,8 @@ Optional Features: - available] - --enable-can enable CAN support [default=yes, if support - available] -+ --enable-netmap enable netmap support [default=yes, if support -+ available] - --enable-dbus enable D-Bus capture support [default=yes, if - support available] - -@@ -8148,6 +8153,39 @@ $as_echo "$as_me: no CAN sniffing support implemented for $host_os" >&6;} - - fi - -+# Check whether --enable-netmap was given. -+if test "${enable_netmap+set}" = set; then : -+ enableval=$enable_netmap; -+else -+ enable_netmap=yes -+fi -+ -+ -+if test "x$enable_netmap" != "xno" ; then -+ case "$host_os" in -+ *) -+ ac_fn_c_check_header_compile "$LINENO" "net/netmap_user.h" "ac_cv_header_net_netmap_user_h" "#include -+ -+" -+if test "x$ac_cv_header_net_netmap_user_h" = xyes; then : -+ -+$as_echo "#define PCAP_SUPPORT_NETMAP 1" >>confdefs.h -+ -+ NETMAP_SRC=pcap-netmap.c -+ { $as_echo "$as_me:${as_lineno-$LINENO}: netmap is supported" >&5 -+$as_echo "$as_me: netmap is supported" >&6;} -+else -+ { $as_echo "$as_me:${as_lineno-$LINENO}: netmap is not supported" >&5 -+$as_echo "$as_me: netmap is not supported" >&6;} -+fi -+ -+ -+ ;; -+ esac -+ -+ -+fi -+ - # Check whether --enable-dbus was given. - if test "${enable_dbus+set}" = set; then : - enableval=$enable_dbus; -diff --git a/configure.in b/configure.in -index f0aa2c5..55464ba 100644 ---- a/configure.in -+++ b/configure.in -@@ -1550,6 +1550,28 @@ if test "x$enable_can" != "xno" ; then - AC_SUBST(CAN_SRC) - fi - -+AC_ARG_ENABLE([netmap], -+[AC_HELP_STRING([--enable-netmap],[enable netmap support @<:@default=yes, if support available@:>@])], -+ [], -+ [enable_netmap=yes]) -+ -+if test "x$enable_netmap" != "xno" ; then -+ dnl check for netmap support -+ case "$host_os" in -+ *) -+ AC_CHECK_HEADER(net/netmap_user.h, -+ [ AC_DEFINE(PCAP_SUPPORT_NETMAP, 1, [target host supports netmap]) -+ NETMAP_SRC=pcap-netmap.c -+ AC_MSG_NOTICE(netmap is supported)], -+ AC_MSG_NOTICE(netmap is not supported), -+ [#include ] -+ ) -+ ;; -+ esac -+ AC_SUBST(PCAP_SUPPORT_NETMAP) -+ AC_SUBST(NETMAP_SRC) -+fi -+ - AC_ARG_ENABLE([dbus], - [AC_HELP_STRING([--enable-dbus],[enable D-Bus capture support @<:@default=yes, if support available@:>@])], - [], -diff --git a/inet.c b/inet.c -index c699658..d132507 100644 ---- a/inet.c -+++ b/inet.c -@@ -883,6 +883,10 @@ pcap_lookupnet(device, netp, maskp, errbuf) - #ifdef PCAP_SUPPORT_USB - || strstr(device, "usbmon") != NULL - #endif -+#ifdef PCAP_SUPPORT_NETMAP -+ || !strncmp(device, "netmap:", 7) -+ || !strncmp(device, "vale", 4) -+#endif - #ifdef HAVE_SNF_API - || strstr(device, "snf") != NULL - #endif -diff --git a/pcap-netmap.c b/pcap-netmap.c -new file mode 100644 -index 0000000..2568c2f ---- /dev/null -+++ b/pcap-netmap.c -@@ -0,0 +1,205 @@ -+/* -+ * Copyright 2014 Universita` di Pisa -+ * -+ * packet filter subroutines for netmap -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define NETMAP_WITH_LIBS -+#include -+ -+#include "pcap-int.h" -+ -+#if defined (linux) -+/* On FreeBSD we use IFF_PPROMISC which is in ifr_flagshigh. -+ * remap to IFF_PROMISC on linux -+ */ -+#define IFF_PPROMISC IFF_PROMISC -+#define ifr_flagshigh ifr_flags -+#endif /* linux */ -+ -+struct pcap_netmap { -+ struct nm_desc *d; /* pointer returned by nm_open() */ -+ pcap_handler cb; /* callback and argument */ -+ u_char *cb_arg; -+ int must_clear_promisc; /* flag */ -+ uint64_t rx_pkts; /* count of packets received before the filter */ -+}; -+ -+static int -+pcap_netmap_stats(pcap_t *p, struct pcap_stat *ps) -+{ -+ struct pcap_netmap *pn = p->priv; -+ -+ ps->ps_recv = pn->rx_pkts; -+ ps->ps_drop = 0; -+ ps->ps_ifdrop = 0; -+ return 0; -+} -+ -+static void -+pcap_netmap_filter(u_char *arg, struct pcap_pkthdr *h, const u_char *buf) -+{ -+ pcap_t *p = (pcap_t *)arg; -+ struct pcap_netmap *pn = p->priv; -+ -+ ++pn->rx_pkts; -+ if (bpf_filter(p->fcode.bf_insns, buf, h->len, h->caplen)) -+ pn->cb(pn->cb_arg, h, buf); -+} -+ -+static int -+pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user) -+{ -+ int ret; -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = pn->d; -+ struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 }; -+ -+ pn->cb = cb; -+ pn->cb_arg = user; -+ -+ for (;;) { -+ if (p->break_loop) { -+ p->break_loop = 0; -+ return PCAP_ERROR_BREAK; -+ } -+ /* nm_dispatch won't run forever */ -+ ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p); -+ if (ret != 0) -+ break; -+ poll(&pfd, 1, p->opt.timeout); -+ } -+ return ret; -+} -+ -+/* XXX need to check the NIOCTXSYNC/poll */ -+static int -+pcap_netmap_inject(pcap_t *p, const void *buf, size_t size) -+{ -+ struct nm_desc *d = ((struct pcap_netmap *)p->priv)->d; -+ -+ return nm_inject(d, buf, size); -+} -+ -+static int -+pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags) -+{ -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = pn->d; -+ struct ifreq ifr; -+ int error, fd = d->fd; -+ -+#ifdef linux -+ fd = socket(AF_INET, SOCK_DGRAM, 0); -+ if (fd < 0) { -+ fprintf(stderr, "Error: cannot get device control socket.\n"); -+ return -1; -+ } -+#endif /* linux */ -+ bzero(&ifr, sizeof(ifr)); -+ strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name)); -+ switch (what) { -+ case SIOCSIFFLAGS: -+ ifr.ifr_flags = *if_flags; -+ ifr.ifr_flagshigh = *if_flags >> 16; -+ break; -+ } -+ error = ioctl(fd, what, &ifr); -+ fprintf(stderr, "%s %s ioctl 0x%lx returns %d\n", __FUNCTION__, -+ d->req.nr_name, what, error); -+ if (error) -+ return -1; -+ switch (what) { -+ case SIOCGIFFLAGS: -+ *if_flags = ifr.ifr_flags | (ifr.ifr_flagshigh << 16); -+ } -+ return 0; -+} -+ -+static void -+pcap_netmap_close(pcap_t *p) -+{ -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = pn->d; -+ uint32_t if_flags = 0; -+ -+ if (pn->must_clear_promisc) { -+ pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ -+ if (if_flags & IFF_PPROMISC) { -+ if_flags &= ~IFF_PPROMISC; -+ pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); -+ } -+ } -+ nm_close(d); -+} -+ -+static int -+pcap_netmap_activate(pcap_t *p) -+{ -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = nm_open(p->opt.source, NULL, 0, NULL); -+ uint32_t if_flags = 0; -+ -+ if (d == NULL) { -+ snprintf(p->errbuf, PCAP_ERRBUF_SIZE, -+ "netmap open: cannot access %s: %s\n", -+ p->opt.source, pcap_strerror(errno)); -+ goto bad; -+ } -+ fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n", -+ __FUNCTION__, p->opt.source, d, d->fd, d->first_rx_ring, d->last_rx_ring); -+ pn->d = d; -+ p->fd = d->fd; -+ if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) { -+ pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ -+ if (!(if_flags & IFF_PPROMISC)) { -+ pn->must_clear_promisc = 1; -+ if_flags |= IFF_PPROMISC; -+ pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); -+ } -+ } -+ p->linktype = DLT_EN10MB; -+ p->selectable_fd = p->fd; -+ p->read_op = pcap_netmap_dispatch; -+ p->inject_op = pcap_netmap_inject, -+ p->setfilter_op = install_bpf_program; -+ p->setdirection_op = NULL; -+ p->set_datalink_op = NULL; -+ p->getnonblock_op = pcap_getnonblock_fd; -+ p->setnonblock_op = pcap_setnonblock_fd; -+ p->stats_op = pcap_netmap_stats; -+ p->cleanup_op = pcap_netmap_close; -+ return (0); -+ -+ bad: -+ pcap_cleanup_live_common(p); -+ return (PCAP_ERROR); -+} -+ -+pcap_t * -+pcap_netmap_create(const char *device, char *ebuf, int *is_ours) -+{ -+ pcap_t *p; -+ -+ *is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4)); -+ if (! *is_ours) -+ return NULL; -+ p = pcap_create_common(device, ebuf, sizeof (struct pcap_netmap)); -+ if (p == NULL) -+ return (NULL); -+ p->activate_op = pcap_netmap_activate; -+ return (p); -+} -diff --git a/pcap.c b/pcap.c -index b2b5da6..beda714 100644 ---- a/pcap.c -+++ b/pcap.c -@@ -104,6 +104,10 @@ - #include "pcap-dbus.h" - #endif - -+#ifdef PCAP_SUPPORT_NETMAP -+pcap_t* pcap_netmap_create(const char *device, char *ebuf, int *is_ours); -+#endif -+ - int - pcap_not_initialized(pcap_t *pcap _U_) - { -@@ -307,6 +311,9 @@ struct capture_source_type { - int (*findalldevs_op)(pcap_if_t **, char *); - pcap_t *(*create_op)(const char *, char *, int *); - } capture_source_types[] = { -+#ifdef PCAP_SUPPORT_NETMAP -+ { NULL, pcap_netmap_create }, -+#endif - #ifdef HAVE_DAG_API - { dag_findalldevs, dag_create }, - #endif diff --git a/netmap/share/man/man4/netmap.4 b/netmap/share/man/man4/netmap.4 deleted file mode 100644 index cd513a2..0000000 --- a/netmap/share/man/man4/netmap.4 +++ /dev/null @@ -1,1075 +0,0 @@ -.\" Copyright (c) 2011-2014 Matteo Landi, Luigi Rizzo, Universita` di Pisa -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" This document is derived in part from the enet man page (enet.4) -.\" distributed with 4.3BSD Unix. -.\" -.\" $FreeBSD: head/share/man/man4/netmap.4 228017 2011-11-27 06:55:57Z gjb $ -.\" -.Dd February 13, 2014 -.Dt NETMAP 4 -.Os -.Sh NAME -.Nm netmap -.Nd a framework for fast packet I/O -.br -.Nm VALE -.Nd a fast VirtuAl Local Ethernet using the netmap API -.br -.Nm netmap pipes -.Nd a shared memory packet transport channel -.Sh SYNOPSIS -.Cd device netmap -.Sh DESCRIPTION -.Nm -is a framework for extremely fast and efficient packet I/O -for both userspace and kernel clients. -It runs on FreeBSD and Linux, -and includes -.Nm VALE , -a very fast and modular in-kernel software switch/dataplane, -and -.Nm netmap pipes , -a shared memory packet transport channel. -All these are accessed interchangeably with the same API. -.Pp -.Nm , VALE -and -.Nm netmap pipes -are at least one order of magnitude faster than -standard OS mechanisms -(sockets, bpf, tun/tap interfaces, native switches, pipes), -reaching 14.88 million packets per second (Mpps) -with much less than one core on a 10 Gbit NIC, -about 20 Mpps per core for VALE ports, -and over 100 Mpps for netmap pipes. -.Pp -Userspace clients can dynamically switch NICs into -.Nm -mode and send and receive raw packets through -memory mapped buffers. -Similarly, -.Nm VALE -switch instances and ports, and -.Nm netmap pipes -can be created dynamically, -providing high speed packet I/O between processes, -virtual machines, NICs and the host stack. -.Pp -.Nm -suports both non-blocking I/O through -.Xr ioctls() , -synchronization and blocking I/O through a file descriptor -and standard OS mechanisms such as -.Xr select 2 , -.Xr poll 2 , -.Xr epoll 2 , -.Xr kqueue 2 . -.Nm VALE -and -.Nm netmap pipes -are implemented by a single kernel module, which also emulates the -.Nm -API over standard drivers for devices without native -.Nm -support. -For best performance, -.Nm -requires explicit support in device drivers. -.Pp -In the rest of this (long) manual page we document -various aspects of the -.Nm -and -.Nm VALE -architecture, features and usage. -.Pp -.Sh ARCHITECTURE -.Nm -supports raw packet I/O through a -.Em port , -which can be connected to a physical interface -.Em ( NIC ) , -to the host stack, -or to a -.Nm VALE -switch). -Ports use preallocated circular queues of buffers -.Em ( rings ) -residing in an mmapped region. -There is one ring for each transmit/receive queue of a -NIC or virtual port. -An additional ring pair connects to the host stack. -.Pp -After binding a file descriptor to a port, a -.Nm -client can send or receive packets in batches through -the rings, and possibly implement zero-copy forwarding -between ports. -.Pp -All NICs operating in -.Nm -mode use the same memory region, -accessible to all processes who own -.Nm /dev/netmap -file descriptors bound to NICs. -Independent -.Nm VALE -and -.Nm netmap pipe -ports -by default use separate memory regions, -but can be independently configured to share memory. -.Pp -.Sh ENTERING AND EXITING NETMAP MODE -The following section describes the system calls to create -and control -.Nm netmap -ports (including -.Nm VALE -and -.Nm netmap pipe -ports). -Simpler, higher level functions are described in section -.Xr LIBRARIES . -.Pp -Ports and rings are created and controlled through a file descriptor, -created by opening a special device -.Dl fd = open("/dev/netmap"); -and then bound to a specific port with an -.Dl ioctl(fd, NIOCREGIF, (struct nmreq *)arg); -.Pp -.Nm -has multiple modes of operation controlled by the -.Vt struct nmreq -argument. -.Va arg.nr_name -specifies the port name, as follows: -.Bl -tag -width XXXX -.It Dv OS network interface name (e.g. 'em0', 'eth1', ... ) -the data path of the NIC is disconnected from the host stack, -and the file descriptor is bound to the NIC (one or all queues), -or to the host stack; -.It Dv valeXXX:YYY (arbitrary XXX and YYY) -the file descriptor is bound to port YYY of a VALE switch called XXX, -both dynamically created if necessary. -The string cannot exceed IFNAMSIZ characters, and YYY cannot -be the name of any existing OS network interface. -.El -.Pp -On return, -.Va arg -indicates the size of the shared memory region, -and the number, size and location of all the -.Nm -data structures, which can be accessed by mmapping the memory -.Dl char *mem = mmap(0, arg.nr_memsize, fd); -.Pp -Non blocking I/O is done with special -.Xr ioctl 2 -.Xr select 2 -and -.Xr poll 2 -on the file descriptor permit blocking I/O. -.Xr epoll 2 -and -.Xr kqueue 2 -are not supported on -.Nm -file descriptors. -.Pp -While a NIC is in -.Nm -mode, the OS will still believe the interface is up and running. -OS-generated packets for that NIC end up into a -.Nm -ring, and another ring is used to send packets into the OS network stack. -A -.Xr close 2 -on the file descriptor removes the binding, -and returns the NIC to normal mode (reconnecting the data path -to the host stack), or destroys the virtual port. -.Pp -.Sh DATA STRUCTURES -The data structures in the mmapped memory region are detailed in -.Xr sys/net/netmap.h , -which is the ultimate reference for the -.Nm -API. The main structures and fields are indicated below: -.Bl -tag -width XXX -.It Dv struct netmap_if (one per interface) -.Bd -literal -struct netmap_if { - ... - const uint32_t ni_flags; /* properties */ - ... - const uint32_t ni_tx_rings; /* NIC tx rings */ - const uint32_t ni_rx_rings; /* NIC rx rings */ - uint32_t ni_bufs_head; /* head of extra bufs list */ - ... -}; -.Ed -.Pp -Indicates the number of available rings -.Pa ( struct netmap_rings ) -and their position in the mmapped region. -The number of tx and rx rings -.Pa ( ni_tx_rings , ni_rx_rings ) -normally depends on the hardware. -NICs also have an extra tx/rx ring pair connected to the host stack. -.Em NIOCREGIF -can also request additional unbound buffers in the same memory space, -to be used as temporary storage for packets. -.Pa ni_bufs_head -contains the index of the first of these free rings, -which are connected in a list (the first uint32_t of each -buffer being the index of the next buffer in the list). -A 0 indicates the end of the list. -.Pp -.It Dv struct netmap_ring (one per ring) -.Bd -literal -struct netmap_ring { - ... - const uint32_t num_slots; /* slots in each ring */ - const uint32_t nr_buf_size; /* size of each buffer */ - ... - uint32_t head; /* (u) first buf owned by user */ - uint32_t cur; /* (u) wakeup position */ - const uint32_t tail; /* (k) first buf owned by kernel */ - ... - uint32_t flags; - struct timeval ts; /* (k) time of last rxsync() */ - ... - struct netmap_slot slot[0]; /* array of slots */ -} -.Ed -.Pp -Implements transmit and receive rings, with read/write -pointers, metadata and and an array of -.Pa slots -describing the buffers. -.Pp -.It Dv struct netmap_slot (one per buffer) -.Bd -literal -struct netmap_slot { - uint32_t buf_idx; /* buffer index */ - uint16_t len; /* packet length */ - uint16_t flags; /* buf changed, etc. */ - uint64_t ptr; /* address for indirect buffers */ -}; -.Ed -.Pp -Describes a packet buffer, which normally is identified by -an index and resides in the mmapped region. -.It Dv packet buffers -Fixed size (normally 2 KB) packet buffers allocated by the kernel. -.El -.Pp -The offset of the -.Pa struct netmap_if -in the mmapped region is indicated by the -.Pa nr_offset -field in the structure returned by -.Pa NIOCREGIF . -From there, all other objects are reachable through -relative references (offsets or indexes). -Macros and functions in -help converting them into actual pointers: -.Pp -.Dl struct netmap_if *nifp = NETMAP_IF(mem, arg.nr_offset); -.Dl struct netmap_ring *txr = NETMAP_TXRING(nifp, ring_index); -.Dl struct netmap_ring *rxr = NETMAP_RXRING(nifp, ring_index); -.Pp -.Dl char *buf = NETMAP_BUF(ring, buffer_index); -.Sh RINGS, BUFFERS AND DATA I/O -.Va Rings -are circular queues of packets with three indexes/pointers -.Va ( head , cur , tail ) ; -one slot is always kept empty. -The ring size -.Va ( num_slots ) -should not be assumed to be a power of two. -.br -(NOTE: older versions of netmap used head/count format to indicate -the content of a ring). -.Pp -.Va head -is the first slot available to userspace; -.br -.Va cur -is the wakeup point: -select/poll will unblock when -.Va tail -passes -.Va cur ; -.br -.Va tail -is the first slot reserved to the kernel. -.Pp -Slot indexes MUST only move forward; -for convenience, the function -.Dl nm_ring_next(ring, index) -returns the next index modulo the ring size. -.Pp -.Va head -and -.Va cur -are only modified by the user program; -.Va tail -is only modified by the kernel. -The kernel only reads/writes the -.Vt struct netmap_ring -slots and buffers -during the execution of a netmap-related system call. -The only exception are slots (and buffers) in the range -.Va tail\ . . . head-1 , -that are explicitly assigned to the kernel. -.Pp -.Ss TRANSMIT RINGS -On transmit rings, after a -.Nm -system call, slots in the range -.Va head\ . . . tail-1 -are available for transmission. -User code should fill the slots sequentially -and advance -.Va head -and -.Va cur -past slots ready to transmit. -.Va cur -may be moved further ahead if the user code needs -more slots before further transmissions (see -.Sx SCATTER GATHER I/O ) . -.Pp -At the next NIOCTXSYNC/select()/poll(), -slots up to -.Va head-1 -are pushed to the port, and -.Va tail -may advance if further slots have become available. -Below is an example of the evolution of a TX ring: -.Pp -.Bd -literal - after the syscall, slots between cur and tail are (a)vailable - head=cur tail - | | - v v - TX [.....aaaaaaaaaaa.............] - - user creates new packets to (T)ransmit - head=cur tail - | | - v v - TX [.....TTTTTaaaaaa.............] - - NIOCTXSYNC/poll()/select() sends packets and reports new slots - head=cur tail - | | - v v - TX [..........aaaaaaaaaaa........] -.Ed -.Pp -select() and poll() wlll block if there is no space in the ring, i.e. -.Dl ring->cur == ring->tail -and return when new slots have become available. -.Pp -High speed applications may want to amortize the cost of system calls -by preparing as many packets as possible before issuing them. -.Pp -A transmit ring with pending transmissions has -.Dl ring->head != ring->tail + 1 (modulo the ring size). -The function -.Va int nm_tx_pending(ring) -implements this test. -.Pp -.Ss RECEIVE RINGS -On receive rings, after a -.Nm -system call, the slots in the range -.Va head\& . . . tail-1 -contain received packets. -User code should process them and advance -.Va head -and -.Va cur -past slots it wants to return to the kernel. -.Va cur -may be moved further ahead if the user code wants to -wait for more packets -without returning all the previous slots to the kernel. -.Pp -At the next NIOCRXSYNC/select()/poll(), -slots up to -.Va head-1 -are returned to the kernel for further receives, and -.Va tail -may advance to report new incoming packets. -.br -Below is an example of the evolution of an RX ring: -.Bd -literal - after the syscall, there are some (h)eld and some (R)eceived slots - head cur tail - | | | - v v v - RX [..hhhhhhRRRRRRRR..........] - - user advances head and cur, releasing some slots and holding others - head cur tail - | | | - v v v - RX [..*****hhhRRRRRR...........] - - NICRXSYNC/poll()/select() recovers slots and reports new packets - head cur tail - | | | - v v v - RX [.......hhhRRRRRRRRRRRR....] -.Ed -.Pp -.Sh SLOTS AND PACKET BUFFERS -Normally, packets should be stored in the netmap-allocated buffers -assigned to slots when ports are bound to a file descriptor. -One packet is fully contained in a single buffer. -.Pp -The following flags affect slot and buffer processing: -.Bl -tag -width XXX -.It NS_BUF_CHANGED -it MUST be used when the buf_idx in the slot is changed. -This can be used to implement -zero-copy forwarding, see -.Sx ZERO-COPY FORWARDING . -.Pp -.It NS_REPORT -reports when this buffer has been transmitted. -Normally, -.Nm -notifies transmit completions in batches, hence signals -can be delayed indefinitely. This flag helps detecting -when packets have been send and a file descriptor can be closed. -.It NS_FORWARD -When a ring is in 'transparent' mode (see -.Sx TRANSPARENT MODE ) , -packets marked with this flags are forwarded to the other endpoint -at the next system call, thus restoring (in a selective way) -the connection between a NIC and the host stack. -.It NS_NO_LEARN -tells the forwarding code that the SRC MAC address for this -packet must not be used in the learning bridge code. -.It NS_INDIRECT -indicates that the packet's payload is in a user-supplied buffer, -whose user virtual address is in the 'ptr' field of the slot. -The size can reach 65535 bytes. -.br -This is only supported on the transmit ring of -.Nm VALE -ports, and it helps reducing data copies in the interconnection -of virtual machines. -.It NS_MOREFRAG -indicates that the packet continues with subsequent buffers; -the last buffer in a packet must have the flag clear. -.El -.Sh SCATTER GATHER I/O -Packets can span multiple slots if the -.Va NS_MOREFRAG -flag is set in all but the last slot. -The maximum length of a chain is 64 buffers. -This is normally used with -.Nm VALE -ports when connecting virtual machines, as they generate large -TSO segments that are not split unless they reach a physical device. -.Pp -NOTE: The length field always refers to the individual -fragment; there is no place with the total length of a packet. -.Pp -On receive rings the macro -.Va NS_RFRAGS(slot) -indicates the remaining number of slots for this packet, -including the current one. -Slots with a value greater than 1 also have NS_MOREFRAG set. -.Sh IOCTLS -.Nm -uses two ioctls (NIOCTXSYNC, NIOCRXSYNC) -for non-blocking I/O. They take no argument. -Two more ioctls (NIOCGINFO, NIOCREGIF) are used -to query and configure ports, with the following argument: -.Bd -literal -struct nmreq { - char nr_name[IFNAMSIZ]; /* (i) port name */ - uint32_t nr_version; /* (i) API version */ - uint32_t nr_offset; /* (o) nifp offset in mmap region */ - uint32_t nr_memsize; /* (o) size of the mmap region */ - uint32_t nr_tx_slots; /* (i/o) slots in tx rings */ - uint32_t nr_rx_slots; /* (i/o) slots in rx rings */ - uint16_t nr_tx_rings; /* (i/o) number of tx rings */ - uint16_t nr_rx_rings; /* (i/o) number of tx rings */ - uint16_t nr_ringid; /* (i/o) ring(s) we care about */ - uint16_t nr_cmd; /* (i) special command */ - uint16_t nr_arg1; /* (i/o) extra arguments */ - uint16_t nr_arg2; /* (i/o) extra arguments */ - uint32_t nr_arg3; /* (i/o) extra arguments */ - uint32_t nr_flags /* (i/o) open mode */ - ... -}; -.Ed -.Pp -A file descriptor obtained through -.Pa /dev/netmap -also supports the ioctl supported by network devices, see -.Xr netintro 4 . -.Pp -.Bl -tag -width XXXX -.It Dv NIOCGINFO -returns EINVAL if the named port does not support netmap. -Otherwise, it returns 0 and (advisory) information -about the port. -Note that all the information below can change before the -interface is actually put in netmap mode. -.Pp -.Bl -tag -width XX -.It Pa nr_memsize -indicates the size of the -.Nm -memory region. NICs in -.Nm -mode all share the same memory region, -whereas -.Nm VALE -ports have independent regions for each port. -.It Pa nr_tx_slots , nr_rx_slots -indicate the size of transmit and receive rings. -.It Pa nr_tx_rings , nr_rx_rings -indicate the number of transmit -and receive rings. -Both ring number and sizes may be configured at runtime -using interface-specific functions (e.g. -.Xr ethtool -). -.El -.It Dv NIOCREGIF -binds the port named in -.Va nr_name -to the file descriptor. For a physical device this also switches it into -.Nm -mode, disconnecting -it from the host stack. -Multiple file descriptors can be bound to the same port, -with proper synchronization left to the user. -.Pp -.Dv NIOCREGIF can also bind a file descriptor to one endpoint of a -.Em netmap pipe , -consisting of two netmap ports with a crossover connection. -A netmap pipe share the same memory space of the parent port, -and is meant to enable configuration where a master process acts -as a dispatcher towards slave processes. -.Pp -To enable this function, the -.Pa nr_arg1 -field of the structure can be used as a hint to the kernel to -indicate how many pipes we expect to use, and reserve extra space -in the memory region. -.Pp -On return, it gives the same info as NIOCGINFO, -with -.Pa nr_ringid -and -.Pa nr_flags -indicating the identity of the rings controlled through the file -descriptor. -.Pp -.Va nr_flags -.Va nr_ringid -selects which rings are controlled through this file descriptor. -Possible values of -.Pa nr_flags -are indicated below, together with the naming schemes -that application libraries (such as the -.Nm nm_open -indicated below) can use to indicate the specific set of rings. -In the example below, "netmap:foo" is any valid netmap port name. -.Pp -.Bl -tag -width XXXXX -.It NR_REG_ALL_NIC "netmap:foo" -(default) all hardware ring pairs -.It NR_REG_SW_NIC "netmap:foo^" -the ``host rings'', connecting to the host stack. -.It NR_RING_NIC_SW "netmap:foo+ -all hardware rings and the host rings -.It NR_REG_ONE_NIC "netmap:foo-i" -only the i-th hardware ring pair, where the number is in -.Pa nr_ringid ; -.It NR_REG_PIPE_MASTER "netmap:foo{i" -the master side of the netmap pipe whose identifier (i) is in -.Pa nr_ringid ; -.It NR_REG_PIPE_SLAVE "netmap:foo}i" -the slave side of the netmap pipe whose identifier (i) is in -.Pa nr_ringid . -.Pp -The identifier of a pipe must be thought as part of the pipe name, -and does not need to be sequential. On return the pipe -will only have a single ring pair with index 0, -irrespective of the value of i. -.El -.Pp -By default, a -.Xr poll 2 -or -.Xr select 2 -call pushes out any pending packets on the transmit ring, even if -no write events are specified. -The feature can be disabled by or-ing -.Va NETMAP_NO_TX_SYNC -to the value written to -.Va nr_ringid. -When this feature is used, -packets are transmitted only on -.Va ioctl(NIOCTXSYNC) -or select()/poll() are called with a write event (POLLOUT/wfdset) or a full ring. -.Pp -When registering a virtual interface that is dynamically created to a -.Xr vale 4 -switch, we can specify the desired number of rings (1 by default, -and currently up to 16) on it using nr_tx_rings and nr_rx_rings fields. -.It Dv NIOCTXSYNC -tells the hardware of new packets to transmit, and updates the -number of slots available for transmission. -.It Dv NIOCRXSYNC -tells the hardware of consumed packets, and asks for newly available -packets. -.El -.Sh SELECT, POLL, EPOLL, KQUEUE. -.Xr select 2 -and -.Xr poll 2 -on a -.Nm -file descriptor process rings as indicated in -.Sx TRANSMIT RINGS -and -.Sx RECEIVE RINGS , -respectively when write (POLLOUT) and read (POLLIN) events are requested. -Both block if no slots are available in the ring -.Va ( ring->cur == ring->tail ) . -Depending on the platform, -.Xr epoll 2 -and -.Xr kqueue 2 -are supported too. -.Pp -Packets in transmit rings are normally pushed out -(and buffers reclaimed) even without -requesting write events. Passing the NETMAP_NO_TX_SYNC flag to -.Em NIOCREGIF -disables this feature. -By default, receive rings are processed only if read -events are requested. Passing the NETMAP_DO_RX_SYNC flag to -.Em NIOCREGIF updates receive rings even without read events. -Note that on epoll and kqueue, NETMAP_NO_TX_SYNC and NETMAP_DO_RX_SYNC -only have an effect when some event is posted for the file descriptor. -.Sh LIBRARIES -The -.Nm -API is supposed to be used directly, both because of its simplicity and -for efficient integration with applications. -.Pp -For conveniency, the -.Va -header provides a few macros and functions to ease creating -a file descriptor and doing I/O with a -.Nm -port. These are loosely modeled after the -.Xr pcap 3 -API, to ease porting of libpcap-based applications to -.Nm . -To use these extra functions, programs should -.Dl #define NETMAP_WITH_LIBS -before -.Dl #include -.Pp -The following functions are available: -.Bl -tag -width XXXXX -.It Va struct nm_desc * nm_open(const char *ifname, const struct nmreq *req, uint64_t flags, const struct nm_desc *arg) -similar to -.Xr pcap_open , -binds a file descriptor to a port. -.Bl -tag -width XX -.It Va ifname -is a port name, in the form "netmap:XXX" for a NIC and "valeXXX:YYY" for a -.Nm VALE -port. -.It Va req -provides the initial values for the argument to the NIOCREGIF ioctl. -The nm_flags and nm_ringid values are overwritten by parsing -ifname and flags, and other fields can be overridden through -the other two arguments. -.It Va arg -points to a struct nm_desc containing arguments (e.g. from a previously -open file descriptor) that should override the defaults. -The fields are used as described below -.It Va flags -can be set to a combination of the following flags: -.Va NETMAP_NO_TX_POLL , -.Va NETMAP_DO_RX_POLL -(copied into nr_ringid); -.Va NM_OPEN_NO_MMAP (if arg points to the same memory region, -avoids the mmap and uses the values from it); -.Va NM_OPEN_IFNAME (ignores ifname and uses the values in arg); -.Va NM_OPEN_ARG1 , -.Va NM_OPEN_ARG2 , -.Va NM_OPEN_ARG3 (uses the fields from arg); -.Va NM_OPEN_RING_CFG (uses the ring number and sizes from arg). -.El -.It Va int nm_close(struct nm_desc *d) -closes the file descriptor, unmaps memory, frees resources. -.It Va int nm_inject(struct nm_desc *d, const void *buf, size_t size) -similar to pcap_inject(), pushes a packet to a ring, returns the size -of the packet is successful, or 0 on error; -.It Va int nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) -similar to pcap_dispatch(), applies a callback to incoming packets -.It Va u_char * nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) -similar to pcap_next(), fetches the next packet -.Pp -.El -.Sh SUPPORTED DEVICES -.Nm -natively supports the following devices: -.Pp -On FreeBSD: -.Xr em 4 , -.Xr igb 4 , -.Xr ixgbe 4 , -.Xr lem 4 , -.Xr re 4 . -.Pp -On Linux -.Xr e1000 4 , -.Xr e1000e 4 , -.Xr igb 4 , -.Xr ixgbe 4 , -.Xr mlx4 4 , -.Xr forcedeth 4 , -.Xr r8169 4 . -.Pp -NICs without native support can still be used in -.Nm -mode through emulation. Performance is inferior to native netmap -mode but still significantly higher than sockets, and approaching -that of in-kernel solutions such as Linux's -.Xr pktgen . -.Pp -Emulation is also available for devices with native netmap support, -which can be used for testing or performance comparison. -The sysctl variable -.Va dev.netmap.admode -globally controls how netmap mode is implemented. -.Sh SYSCTL VARIABLES AND MODULE PARAMETERS -Some aspect of the operation of -.Nm -are controlled through sysctl variables on FreeBSD -.Em ( dev.netmap.* ) -and module parameters on Linux -.Em ( /sys/module/netmap_lin/parameters/* ) : -.Pp -.Bl -tag -width indent -.It Va dev.netmap.admode: 0 -Controls the use of native or emulated adapter mode. -0 uses the best available option, 1 forces native and -fails if not available, 2 forces emulated hence never fails. -.It Va dev.netmap.generic_ringsize: 1024 -Ring size used for emulated netmap mode -.It Va dev.netmap.generic_mit: 100000 -Controls interrupt moderation for emulated mode -.It Va dev.netmap.mmap_unreg: 0 -.It Va dev.netmap.fwd: 0 -Forces NS_FORWARD mode -.It Va dev.netmap.flags: 0 -.It Va dev.netmap.txsync_retry: 2 -.It Va dev.netmap.no_pendintr: 1 -Forces recovery of transmit buffers on system calls -.It Va dev.netmap.mitigate: 1 -Propagates interrupt mitigation to user processes -.It Va dev.netmap.no_timestamp: 0 -Disables the update of the timestamp in the netmap ring -.It Va dev.netmap.verbose: 0 -Verbose kernel messages -.It Va dev.netmap.buf_num: 163840 -.It Va dev.netmap.buf_size: 2048 -.It Va dev.netmap.ring_num: 200 -.It Va dev.netmap.ring_size: 36864 -.It Va dev.netmap.if_num: 100 -.It Va dev.netmap.if_size: 1024 -Sizes and number of objects (netmap_if, netmap_ring, buffers) -for the global memory region. The only parameter worth modifying is -.Va dev.netmap.buf_num -as it impacts the total amount of memory used by netmap. -.It Va dev.netmap.buf_curr_num: 0 -.It Va dev.netmap.buf_curr_size: 0 -.It Va dev.netmap.ring_curr_num: 0 -.It Va dev.netmap.ring_curr_size: 0 -.It Va dev.netmap.if_curr_num: 0 -.It Va dev.netmap.if_curr_size: 0 -Actual values in use. -.It Va dev.netmap.bridge_batch: 1024 -Batch size used when moving packets across a -.Nm VALE -switch. Values above 64 generally guarantee good -performance. -.El -.Sh SYSTEM CALLS -.Nm -uses -.Xr select 2 , -.Xr poll 2 , -.Xr epoll -and -.Xr kqueue -to wake up processes when significant events occur, and -.Xr mmap 2 -to map memory. -.Xr ioctl 2 -is used to configure ports and -.Nm VALE switches . -.Pp -Applications may need to create threads and bind them to -specific cores to improve performance, using standard -OS primitives, see -.Xr pthread 3 . -In particular, -.Xr pthread_setaffinity_np 3 -may be of use. -.Sh CAVEATS -No matter how fast the CPU and OS are, -achieving line rate on 10G and faster interfaces -requires hardware with sufficient performance. -Several NICs are unable to sustain line rate with -small packet sizes. Insufficient PCIe or memory bandwidth -can also cause reduced performance. -.Pp -Another frequent reason for low performance is the use -of flow control on the link: a slow receiver can limit -the transmit speed. -Be sure to disable flow control when running high -speed experiments. -.Pp -.Ss SPECIAL NIC FEATURES -.Nm -is orthogonal to some NIC features such as -multiqueue, schedulers, packet filters. -.Pp -Multiple transmit and receive rings are supported natively -and can be configured with ordinary OS tools, -such as -.Xr ethtool -or -device-specific sysctl variables. -The same goes for Receive Packet Steering (RPS) -and filtering of incoming traffic. -.Pp -.Nm -.Em does not use -features such as -.Em checksum offloading , TCP segmentation offloading , -.Em encryption , VLAN encapsulation/decapsulation , -etc. . -When using netmap to exchange packets with the host stack, -make sure to disable these features. -.Sh EXAMPLES -.Ss TEST PROGRAMS -.Nm -comes with a few programs that can be used for testing or -simple applications. -See the -.Va examples/ -directory in -.Nm -distributions, or -.Va tools/tools/netmap/ -directory in FreeBSD distributions. -.Pp -.Xr pkt-gen -is a general purpose traffic source/sink. -.Pp -As an example -.Dl pkt-gen -i ix0 -f tx -l 60 -can generate an infinite stream of minimum size packets, and -.Dl pkt-gen -i ix0 -f rx -is a traffic sink. -Both print traffic statistics, to help monitor -how the system performs. -.Pp -.Xr pkt-gen -has many options can be uses to set packet sizes, addresses, -rates, and use multiple send/receive threads and cores. -.Pp -.Xr bridge -is another test program which interconnects two -.Nm -ports. It can be used for transparent forwarding between -interfaces, as in -.Dl bridge -i ix0 -i ix1 -or even connect the NIC to the host stack using netmap -.Dl bridge -i ix0 -i ix0 -.Ss USING THE NATIVE API -The following code implements a traffic generator -.Pp -.Bd -literal -compact -#include -... -void sender(void) -{ - struct netmap_if *nifp; - struct netmap_ring *ring; - struct nmreq nmr; - struct pollfd fds; - - fd = open("/dev/netmap", O_RDWR); - bzero(&nmr, sizeof(nmr)); - strcpy(nmr.nr_name, "ix0"); - nmr.nm_version = NETMAP_API; - ioctl(fd, NIOCREGIF, &nmr); - p = mmap(0, nmr.nr_memsize, fd); - nifp = NETMAP_IF(p, nmr.nr_offset); - ring = NETMAP_TXRING(nifp, 0); - fds.fd = fd; - fds.events = POLLOUT; - for (;;) { - poll(&fds, 1, -1); - while (!nm_ring_empty(ring)) { - i = ring->cur; - buf = NETMAP_BUF(ring, ring->slot[i].buf_index); - ... prepare packet in buf ... - ring->slot[i].len = ... packet length ... - ring->head = ring->cur = nm_ring_next(ring, i); - } - } -} -.Ed -.Ss HELPER FUNCTIONS -A simple receiver can be implemented using the helper functions -.Bd -literal -compact -#define NETMAP_WITH_LIBS -#include -... -void receiver(void) -{ - struct nm_desc *d; - struct pollfd fds; - u_char *buf; - struct nm_pkthdr h; - ... - d = nm_open("netmap:ix0", NULL, 0, 0); - fds.fd = NETMAP_FD(d); - fds.events = POLLIN; - for (;;) { - poll(&fds, 1, -1); - while ( (buf = nm_nextpkt(d, &h)) ) - consume_pkt(buf, h->len); - } - nm_close(d); -} -.Ed -.Ss ZERO-COPY FORWARDING -Since physical interfaces share the same memory region, -it is possible to do packet forwarding between ports -swapping buffers. The buffer from the transmit ring is used -to replenish the receive ring: -.Bd -literal -compact - uint32_t tmp; - struct netmap_slot *src, *dst; - ... - src = &src_ring->slot[rxr->cur]; - dst = &dst_ring->slot[txr->cur]; - tmp = dst->buf_idx; - dst->buf_idx = src->buf_idx; - dst->len = src->len; - dst->flags = NS_BUF_CHANGED; - src->buf_idx = tmp; - src->flags = NS_BUF_CHANGED; - rxr->head = rxr->cur = nm_ring_next(rxr, rxr->cur); - txr->head = txr->cur = nm_ring_next(txr, txr->cur); - ... -.Ed -.Ss ACCESSING THE HOST STACK -The host stack is for all practical purposes just a regular ring pair, -which you can access with the netmap API (e.g. with -.Dl nm_open("netmap:eth0^", ... ) ; -All packets that the host would send to an interface in -.Nm -mode end up into the RX ring, whereas all packets queued to the -TX ring are send up to the host stack. -.Ss VALE SWITCH -A simple way to test the performance of a -.Nm VALE -switch is to attach a sender and a receiver to it, -e.g. running the following in two different terminals: -.Dl pkt-gen -i vale1:a -f rx # receiver -.Dl pkt-gen -i vale1:b -f tx # sender -The same example can be used to test netmap pipes, by simply -changing port names, e.g. -.Dl pkt-gen -i vale:x{3 -f rx # receiver on the master side -.Dl pkt-gen -i vale:x}3 -f tx # sender on the slave side -.Pp -The following command attaches an interface and the host stack -to a switch: -.Dl vale-ctl -h vale2:em0 -Other -.Nm -clients attached to the same switch can now communicate -with the network card or the host. -.Pp -.Sh SEE ALSO -.Pp -http://info.iet.unipi.it/~luigi/netmap/ -.Pp -Luigi Rizzo, Revisiting network I/O APIs: the netmap framework, -Communications of the ACM, 55 (3), pp.45-51, March 2012 -.Pp -Luigi Rizzo, netmap: a novel framework for fast packet I/O, -Usenix ATC'12, June 2012, Boston -.Pp -Luigi Rizzo, Giuseppe Lettieri, -VALE, a switched ethernet for virtual machines, -ACM CoNEXT'12, December 2012, Nice -.Pp -Luigi Rizzo, Giuseppe Lettieri, Vincenzo Maffione, -Speeding up packet I/O in virtual machines, -ACM/IEEE ANCS'13, October 2013, San Jose -.Sh AUTHORS -.An -nosplit -The -.Nm -framework has been originally designed and implemented at the -Universita` di Pisa in 2011 by -.An Luigi Rizzo , -and further extended with help from -.An Matteo Landi , -.An Gaetano Catalli , -.An Giuseppe Lettieri , -.An Vincenzo Maffione . -.Pp -.Nm -and -.Nm VALE -have been funded by the European Commission within FP7 Projects -CHANGE (257422) and OPENLAB (287581). diff --git a/netmap/sys/dev/netmap/if_em_netmap.h b/netmap/sys/dev/netmap/if_em_netmap.h deleted file mode 100644 index 9757c31..0000000 --- a/netmap/sys/dev/netmap/if_em_netmap.h +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 238985 2012-08-02 11:59:43Z luigi $ - * - * netmap support for: em. - * - * For more details on netmap support please see ixgbe_netmap.h - */ - - -#include -#include -#include -#include /* vtophys ? */ -#include - - -// XXX do we need to block/unblock the tasks ? -static void -em_netmap_block_tasks(struct adapter *adapter) -{ - if (adapter->msix > 1) { /* MSIX */ - int i; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - - for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) { - taskqueue_block(txr->tq); - taskqueue_drain(txr->tq, &txr->tx_task); - taskqueue_block(rxr->tq); - taskqueue_drain(rxr->tq, &rxr->rx_task); - } - } else { /* legacy */ - taskqueue_block(adapter->tq); - taskqueue_drain(adapter->tq, &adapter->link_task); - taskqueue_drain(adapter->tq, &adapter->que_task); - } -} - - -static void -em_netmap_unblock_tasks(struct adapter *adapter) -{ - if (adapter->msix > 1) { - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - int i; - - for (i = 0; i < adapter->num_queues; i++) { - taskqueue_unblock(txr->tq); - taskqueue_unblock(rxr->tq); - } - } else { /* legacy */ - taskqueue_unblock(adapter->tq); - } -} - - -/* - * Register/unregister. We are already under netmap lock. - */ -static int -em_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct adapter *adapter = ifp->if_softc; - - EM_CORE_LOCK(adapter); - em_disable_intr(adapter); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - em_netmap_block_tasks(adapter); - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - em_init_locked(adapter); /* also enable intr */ - em_netmap_unblock_tasks(adapter); - EM_CORE_UNLOCK(adapter); - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -em_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* generate an interrupt approximately every half ring */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - /* - * First part: process new packets to send. - */ - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - struct e1000_tx_desc *curr = &txr->tx_base[nic_i]; - struct em_buffer *txbuf = &txr->tx_buffers[nic_i]; - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - E1000_TXD_CMD_RS : 0; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - curr->buffer_addr = htole64(paddr); - /* buffer has changed, reload map */ - netmap_reload_map(txr->txtag, txbuf->map, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - curr->upper.data = 0; - curr->lower.data = htole32(adapter->txd_cmd | len | - (E1000_TXD_CMD_EOP | flags) ); - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - /* synchronize the NIC ring */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* (re)start the tx unit up to slot nic_i (excluded) */ - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i); - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - /* record completed transmissions using TDH */ - nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id)); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - if (nic_i != txr->next_to_clean) { - txr->next_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - } - - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -em_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id]; - - if (head > lim) - return netmap_ring_reinit(kring); - - /* XXX check sync modes */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = rxr->next_to_check; - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { // XXX no need to count - struct e1000_rx_desc *curr = &rxr->rx_base[nic_i]; - uint32_t staterr = le32toh(curr->status); - - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->length); - ring->slot[nm_i].flags = slot_flags; - bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map, - BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - /* make sure next_to_refresh follows next_to_check */ - rxr->next_to_refresh = nic_i; // XXX - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - rxr->next_to_check = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - struct e1000_rx_desc *curr = &rxr->rx_base[nic_i]; - struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i]; - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - curr->buffer_addr = htole64(paddr); - netmap_reload_map(rxr->rxtag, rxbuf->map, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->status = 0; - bus_dmamap_sync(rxr->rxtag, rxbuf->map, - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -static void -em_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = em_netmap_txsync; - na.nm_rxsync = em_netmap_rxsync; - na.nm_register = em_netmap_reg; - na.num_tx_rings = na.num_rx_rings = adapter->num_queues; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/sys/dev/netmap/if_igb_netmap.h b/netmap/sys/dev/netmap/if_igb_netmap.h deleted file mode 100644 index 9d73961..0000000 --- a/netmap/sys/dev/netmap/if_igb_netmap.h +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/if_igb_netmap.h 256200 2013-10-09 17:32:52Z jfv $ - * - * Netmap support for igb, partly contributed by Ahmed Kooli - * For details on netmap support please see ixgbe_netmap.h - */ - - -#include -#include -#include -#include /* vtophys ? */ -#include - -/* - * Adaptation to different versions of the driver. - */ - -#ifndef IGB_MEDIA_RESET -/* at the same time as IGB_MEDIA_RESET was defined, the - * tx buffer descriptor was renamed, so use this to revert - * back to the old name. - */ -#define igb_tx_buf igb_tx_buffer -#endif - - -/* - * Register/unregister. We are already under netmap lock. - */ -static int -igb_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct adapter *adapter = ifp->if_softc; - - IGB_CORE_LOCK(adapter); - igb_disable_intr(adapter); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - igb_init_locked(adapter); /* also enable intr */ - IGB_CORE_UNLOCK(adapter); - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -igb_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* generate an interrupt approximately every half ring */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; - /* 82575 needs the queue index added */ - u32 olinfo_status = - (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - /* - * First part: process new packets to send. - */ - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - union e1000_adv_tx_desc *curr = - (union e1000_adv_tx_desc *)&txr->tx_base[nic_i]; - struct igb_tx_buf *txbuf = &txr->tx_buffers[nic_i]; - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - E1000_ADVTXD_DCMD_RS : 0; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(txr->txtag, txbuf->map, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - curr->read.buffer_addr = htole64(paddr); - // XXX check olinfo and cmd_type_len - curr->read.olinfo_status = - htole32(olinfo_status | - (len<< E1000_ADVTXD_PAYLEN_SHIFT)); - curr->read.cmd_type_len = - htole32(len | E1000_ADVTXD_DTYP_DATA | - E1000_ADVTXD_DCMD_IFCS | - E1000_ADVTXD_DCMD_DEXT | - E1000_ADVTXD_DCMD_EOP | flags); - - /* make sure changes to the buffer are synced */ - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - /* Set the watchdog XXX ? */ - txr->queue_status = IGB_QUEUE_WORKING; - txr->watchdog_time = ticks; - - /* synchronize the NIC ring */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* (re)start the tx unit up to slot nic_i (excluded) */ - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i); - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - /* record completed transmissions using TDH */ - nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id)); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - txr->next_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -igb_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id]; - - if (head > lim) - return netmap_ring_reinit(kring); - - /* XXX check sync modes */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = rxr->next_to_check; - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i]; - uint32_t staterr = le32toh(curr->wb.upper.status_error); - - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->wb.upper.length); - ring->slot[nm_i].flags = slot_flags; - bus_dmamap_sync(rxr->ptag, - rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - rxr->next_to_check = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i]; - struct igb_rx_buf *rxbuf = &rxr->rx_buffers[nic_i]; - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(rxr->ptag, rxbuf->pmap, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->wb.upper.status_error = 0; - curr->read.pkt_addr = htole64(paddr); - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -static void -igb_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = igb_netmap_txsync; - na.nm_rxsync = igb_netmap_rxsync; - na.nm_register = igb_netmap_reg; - na.num_tx_rings = na.num_rx_rings = adapter->num_queues; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/sys/dev/netmap/if_lem_netmap.h b/netmap/sys/dev/netmap/if_lem_netmap.h deleted file mode 100644 index a53310a..0000000 --- a/netmap/sys/dev/netmap/if_lem_netmap.h +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -/* - * $FreeBSD: head/sys/dev/netmap/if_lem_netmap.h 231881 2012-02-17 14:09:04Z luigi $ - * - * netmap support for: lem - * - * For details on netmap support please see ixgbe_netmap.h - */ - - -#include -#include -#include -#include /* vtophys ? */ -#include - - -/* - * Register/unregister. We are already under netmap lock. - */ -static int -lem_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct adapter *adapter = ifp->if_softc; - - EM_CORE_LOCK(adapter); - - lem_disable_intr(adapter); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - -#ifndef EM_LEGACY_IRQ // XXX do we need this ? - taskqueue_block(adapter->tq); - taskqueue_drain(adapter->tq, &adapter->rxtx_task); - taskqueue_drain(adapter->tq, &adapter->link_task); -#endif /* !EM_LEGCY_IRQ */ - - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - lem_init_locked(adapter); /* also enable intr */ - -#ifndef EM_LEGACY_IRQ - taskqueue_unblock(adapter->tq); // XXX do we need this ? -#endif /* !EM_LEGCY_IRQ */ - - EM_CORE_UNLOCK(adapter); - - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -lem_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* generate an interrupt approximately every half ring */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - /* - * First part: process new packets to send. - */ - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - while (nm_i != head) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i]; - struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i]; - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - E1000_TXD_CMD_RS : 0; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - curr->buffer_addr = htole64(paddr); - netmap_reload_map(adapter->txtag, txbuf->map, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - curr->upper.data = 0; - curr->lower.data = htole32(adapter->txd_cmd | len | - (E1000_TXD_CMD_EOP | flags) ); - bus_dmamap_sync(adapter->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - /* synchronize the NIC ring */ - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* (re)start the tx unit up to slot nic_i (excluded) */ - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i); - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - kring->last_reclaim = ticks; - /* record completed transmissions using TDH */ - nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0)); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - adapter->next_tx_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -lem_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - - if (head > lim) - return netmap_ring_reinit(kring); - - /* XXX check sync modes */ - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = adapter->next_rx_desc_to_check; - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; - uint32_t staterr = le32toh(curr->status); - int len; - - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - len = le16toh(curr->length) - 4; // CRC - if (len < 0) { - D("bogus pkt size %d nic idx %d", len, nic_i); - len = 0; - } - ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; - bus_dmamap_sync(adapter->rxtag, - adapter->rx_buffer_area[nic_i].map, - BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - ND("%d new packets at nic %d nm %d tail %d", - n, - adapter->next_rx_desc_to_check, - netmap_idx_n2k(kring, adapter->next_rx_desc_to_check), - kring->nr_hwtail); - adapter->next_rx_desc_to_check = nic_i; - // ifp->if_ipackets += n; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; - struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i]; - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - curr->buffer_addr = htole64(paddr); - netmap_reload_map(adapter->rxtag, rxbuf->map, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->status = 0; - bus_dmamap_sync(adapter->rxtag, rxbuf->map, - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -static void -lem_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = lem_netmap_txsync; - na.nm_rxsync = lem_netmap_rxsync; - na.nm_register = lem_netmap_reg; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/sys/dev/netmap/if_nfe_netmap.h b/netmap/sys/dev/netmap/if_nfe_netmap.h deleted file mode 100644 index c47e02b..0000000 --- a/netmap/sys/dev/netmap/if_nfe_netmap.h +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 231881 2012-02-17 14:09:04Z luigi $ - * - * netmap support for: nfe XXX not yet tested. - * - * For more details on netmap support please see ixgbe_netmap.h - */ - - -#include -#include -#include -#include - -#include - - -static int -nfe_netmap_init_buffers(struct nfe_softc *sc) -{ - struct netmap_adapter *na = NA(sc->nfe_ifp); - struct netmap_slot *slot; - int i, l, n, max_avail; - struct nfe_desc32 *desc32 = NULL; - struct nfe_desc64 *desc64 = NULL; - void *addr; - uint64_t paddr; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return 0; - } - - slot = netmap_reset(na, NR_TX, 0, 0); - if (!slot) - return 0; // XXX cannot happen - // XXX init the tx ring - n = NFE_TX_RING_COUNT; - for (i = 0; i < n; i++) { - l = netmap_idx_n2k(&na->tx_rings[0], i); - addr = PNMB(slot + l, &paddr); - netmap_reload_map(sc->txq.tx_data_tag, - sc->txq.data[l].tx_data_map, addr); - slot[l].flags = 0; - if (sc->nfe_flags & NFE_40BIT_ADDR) { - desc64 = &sc->txq.desc64[l]; - desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr)); - desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr)); - desc64->vtag = 0; - desc64->length = htole16(0); - desc64->flags = htole16(0); - } else { - desc32 = &sc->txq.desc32[l]; - desc32->physaddr = htole32(NFE_ADDR_LO(paddr)); - desc32->length = htole16(0); - desc32->flags = htole16(0); - } - } - - slot = netmap_reset(na, NR_RX, 0, 0); - // XXX init the rx ring - /* - * preserve buffers still owned by the driver (and keep one empty). - */ - n = NFE_RX_RING_COUNT; - max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]); - for (i = 0; i < n; i++) { - uint16_t flags; - l = netmap_idx_n2k(&na->rx_rings[0], i); - addr = PNMB(slot + l, &paddr); - flags = (i < max_avail) ? NFE_RX_READY : 0; - if (sc->nfe_flags & NFE_40BIT_ADDR) { - desc64 = &sc->rxq.desc64[l]; - desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr)); - desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr)); - desc64->vtag = 0; - desc64->length = htole16(NETMAP_BUF_SIZE); - desc64->flags = htole16(NFE_RX_READY); - } else { - desc32 = &sc->rxq.desc32[l]; - desc32->physaddr = htole32(NFE_ADDR_LO(paddr)); - desc32->length = htole16(NETMAP_BUF_SIZE); - desc32->flags = htole16(NFE_RX_READY); - } - - netmap_reload_map(sc->rxq.rx_data_tag, - sc->rxq.data[l].rx_data_map, addr); - bus_dmamap_sync(sc->rxq.rx_data_tag, - sc->rxq.data[l].rx_data_map, BUS_DMASYNC_PREREAD); - } - - return 1; -} - - -/* - * Register/unregister. We are already under netmap lock. - */ -static int -nfe_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct nfe_softc *sc = ifp->if_softc; - - NFE_LOCK(sc); - nfe_stop(ifp); /* also clear IFF_DRV_RUNNING */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - nfe_init_locked(sc); /* also enable intr */ - NFE_UNLOCK(sc); - return (0); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -nfe_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* generate an interrupt approximately every half ring */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct nfe_softc *sc = ifp->if_softc; - struct nfe_desc32 *desc32 = NULL; - struct nfe_desc64 *desc64 = NULL; - - bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map, - BUS_DMASYNC_POSTREAD); - - /* - * First part: process new packets to send. - */ - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - /* slot is the current slot in the netmap ring */ - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(sc->txq.tx_data_tag, - sc->txq.data[l].tx_data_map, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - if (sc->nfe_flags & NFE_40BIT_ADDR) { - desc64 = &sc->txq.desc64[l]; - desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr)); - desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr)); - desc64->vtag = 0; - desc64->length = htole16(len - 1); - desc64->flags = - htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V2); - } else { - desc32 = &sc->txq.desc32[l]; - desc32->physaddr = htole32(NFE_ADDR_LO(paddr)); - desc32->length = htole16(len - 1); - desc32->flags = - htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V1); - } - - bus_dmamap_sync(sc->txq.tx_data_tag, - sc->txq.data[l].tx_data_map, BUS_DMASYNC_PREWRITE); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - sc->txq.cur = nic_i; - - bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* XXX something missing ? where is the last pkt marker ? */ - NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl); - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - u_int nic_cur = sc->txq.cur; - nic_i = sc->txq.next; - for (n = 0; nic_i != nic_cur; n++, NFE_INC(nic_i, NFE_TX_RING_COUNT)) { - uint16_t flags; - if (sc->nfe_flags & NFE_40BIT_ADDR) { - desc64 = &sc->txq.desc64[l]; - flags = le16toh(desc64->flags); - } else { - desc32 = &sc->txq.desc32[l]; - flags = le16toh(desc32->flags); - } - if (flags & NFE_TX_VALID) - break; - } - if (n > 0) { - sc->txq.next = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - } - - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -nfe_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct nfe_softc *sc = ifp->if_softc; - struct nfe_desc32 *desc32; - struct nfe_desc64 *desc64; - - if (head > lim) - return netmap_ring_reinit(kring); - - bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - uint16_t flags, len; - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = sc->rxq.cur; - nm_i = netmap_idx_n2k(kring, nic_i); - for (n = 0; ; n++) { - if (sc->nfe_flags & NFE_40BIT_ADDR) { - desc64 = &sc->rxq.desc64[sc->rxq.cur]; - flags = le16toh(desc64->flags); - len = le16toh(desc64->length) & NFE_RX_LEN_MASK; - } else { - desc32 = &sc->rxq.desc32[sc->rxq.cur]; - flags = le16toh(desc32->flags); - len = le16toh(desc32->length) & NFE_RX_LEN_MASK; - } - - if (flags & NFE_RX_READY) - break; - - ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; - bus_dmamap_sync(sc->rxq.rx_data_tag, - sc->rxq.data[nic_i].rx_data_map, - BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - sc->rxq.cur = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(sc->rxq.rx_data_tag, - sc->rxq.data[l].rx_data_map, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - if (sc->nfe_flags & NFE_40BIT_ADDR) { - desc64 = &sc->rxq.desc64[nic_i]; - desc64->physaddr[0] = - htole32(NFE_ADDR_HI(paddr)); - desc64->physaddr[1] = - htole32(NFE_ADDR_LO(paddr)); - desc64->length = htole16(NETMAP_BUF_SIZE); - desc64->flags = htole16(NFE_RX_READY); - } else { - desc32 = &sc->rxq.desc32[nic_i]; - desc32->physaddr = - htole32(NFE_ADDR_LO(paddr)); - desc32->length = htole16(NETMAP_BUF_SIZE); - desc32->flags = htole16(NFE_RX_READY); - } - - bus_dmamap_sync(sc->rxq.rx_data_tag, - sc->rxq.data[nic_i].rx_data_map, - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -static void -nfe_netmap_attach(struct nfe_softc *sc) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = sc->nfe_ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = NFE_TX_RING_COUNT; - na.num_rx_desc = NFE_RX_RING_COUNT; - na.nm_txsync = nfe_netmap_txsync; - na.nm_rxsync = nfe_netmap_rxsync; - na.nm_register = nfe_netmap_reg; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na, 1); -} - -/* end of file */ diff --git a/netmap/sys/dev/netmap/if_re_netmap.h b/netmap/sys/dev/netmap/if_re_netmap.h deleted file mode 100644 index 5b96d23..0000000 --- a/netmap/sys/dev/netmap/if_re_netmap.h +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/if_re_netmap.h 234225 2012-04-13 15:33:12Z luigi $ - * - * netmap support for: re - * - * For more details on netmap support please see ixgbe_netmap.h - */ - - -#include -#include -#include -#include /* vtophys ? */ -#include - - -/* - * Register/unregister. We are already under netmap lock. - */ -static int -re_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct rl_softc *adapter = ifp->if_softc; - - RL_LOCK(adapter); - re_stop(adapter); /* also clears IFF_DRV_RUNNING */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - re_init_locked(adapter); /* also enables intr */ - RL_UNLOCK(adapter); - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -re_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - - /* device-specific */ - struct rl_softc *sc = ifp->if_softc; - struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc; - - bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, - sc->rl_ldata.rl_tx_list_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ? - - /* - * First part: process new packets to send. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = sc->rl_ldata.rl_tx_prodidx; - // XXX or netmap_idx_k2n(kring, nm_i); - - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[nic_i]; - int cmd = slot->len | RL_TDESC_CMD_EOF | - RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ; - - NM_CHECK_ADDR_LEN(addr, len); - - if (nic_i == lim) /* mark end of ring */ - cmd |= RL_TDESC_CMD_EOR; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); - desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); - netmap_reload_map(sc->rl_ldata.rl_tx_mtag, - txd[nic_i].tx_dmamap, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - desc->rl_cmdstat = htole32(cmd); - - /* make sure changes to the buffer are synced */ - bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, - txd[nic_i].tx_dmamap, - BUS_DMASYNC_PREWRITE); - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - sc->rl_ldata.rl_tx_prodidx = nic_i; - kring->nr_hwcur = head; - - /* synchronize the NIC ring */ - bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, - sc->rl_ldata.rl_tx_list_map, - BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); - - /* start ? */ - CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); - } - - /* - * Second part: reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - nic_i = sc->rl_ldata.rl_tx_considx; - for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx; - n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) { - uint32_t cmdstat = - le32toh(sc->rl_ldata.rl_tx_list[nic_i].rl_cmdstat); - if (cmdstat & RL_TDESC_STAT_OWN) - break; - } - if (n > 0) { - sc->rl_ldata.rl_tx_considx = nic_i; - sc->rl_ldata.rl_tx_free += n; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - } - - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -re_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct rl_softc *sc = ifp->if_softc; - struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc; - - if (head > lim) - return netmap_ring_reinit(kring); - - bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, - sc->rl_ldata.rl_rx_list_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part: import newly received packets. - * - * This device uses all the buffers in the ring, so we need - * another termination condition in addition to RL_RDESC_STAT_OWN - * cleared (all buffers could have it cleared). The easiest one - * is to stop right before nm_hwcur. - */ - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - uint32_t stop_i = nm_prev(kring->nr_hwcur, lim); - - nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */ - nm_i = netmap_idx_n2k(kring, nic_i); - - while (nm_i != stop_i) { - struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i]; - uint32_t rxstat = le32toh(cur_rx->rl_cmdstat); - uint32_t total_len; - - if ((rxstat & RL_RDESC_STAT_OWN) != 0) - break; - total_len = rxstat & sc->rl_rxlenmask; - /* XXX subtract crc */ - total_len = (total_len < 4) ? 0 : total_len - 4; - ring->slot[nm_i].len = total_len; - ring->slot[nm_i].flags = slot_flags; - /* sync was in re_newbuf() */ - bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, - rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD); - // sc->rl_ifp->if_ipackets++; - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - sc->rl_ldata.rl_rx_prodidx = nic_i; - kring->nr_hwtail = nm_i; - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[nic_i]; - int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN; - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (nic_i == lim) /* mark end of ring */ - cmd |= RL_RDESC_CMD_EOR; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); - desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); - netmap_reload_map(sc->rl_ldata.rl_rx_mtag, - rxd[nic_i].rx_dmamap, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - desc->rl_cmdstat = htole32(cmd); - bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, - rxd[nic_i].rx_dmamap, - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, - sc->rl_ldata.rl_rx_list_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * Additional routines to init the tx and rx rings. - * In other drivers we do that inline in the main code. - */ -static void -re_netmap_tx_init(struct rl_softc *sc) -{ - struct rl_txdesc *txd; - struct rl_desc *desc; - int i, n; - struct netmap_adapter *na = NA(sc->rl_ifp); - struct netmap_slot *slot; - - if (!na || !(na->na_flags & NAF_NATIVE_ON)) { - return; - } - - slot = netmap_reset(na, NR_TX, 0, 0); - /* slot is NULL if we are not in netmap mode */ - if (!slot) - return; // XXX cannot happen - /* in netmap mode, overwrite addresses and maps */ - txd = sc->rl_ldata.rl_tx_desc; - desc = sc->rl_ldata.rl_tx_list; - n = sc->rl_ldata.rl_tx_desc_cnt; - - /* l points in the netmap ring, i points in the NIC ring */ - for (i = 0; i < n; i++) { - uint64_t paddr; - int l = netmap_idx_n2k(&na->tx_rings[0], i); - void *addr = PNMB(slot + l, &paddr); - - desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); - desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); - netmap_load_map(sc->rl_ldata.rl_tx_mtag, - txd[i].tx_dmamap, addr); - } -} - -static void -re_netmap_rx_init(struct rl_softc *sc) -{ - struct netmap_adapter *na = NA(sc->rl_ifp); - struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0); - struct rl_desc *desc = sc->rl_ldata.rl_rx_list; - uint32_t cmdstat; - uint32_t nic_i, max_avail; - uint32_t const n = sc->rl_ldata.rl_rx_desc_cnt; - - if (!slot) - return; - /* - * Do not release the slots owned by userspace, - * and also keep one empty. - */ - max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]); - for (nic_i = 0; nic_i < n; nic_i++) { - void *addr; - uint64_t paddr; - uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i); - - addr = PNMB(slot + nm_i, &paddr); - - netmap_reload_map(sc->rl_ldata.rl_rx_mtag, - sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, addr); - bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, - sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, BUS_DMASYNC_PREREAD); - desc[nic_i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); - desc[nic_i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); - cmdstat = NETMAP_BUF_SIZE; - if (nic_i == n - 1) /* mark the end of ring */ - cmdstat |= RL_RDESC_CMD_EOR; - if (nic_i < max_avail) - cmdstat |= RL_RDESC_CMD_OWN; - desc[nic_i].rl_cmdstat = htole32(cmdstat); - } -} - - -static void -re_netmap_attach(struct rl_softc *sc) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = sc->rl_ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt; - na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt; - na.nm_txsync = re_netmap_txsync; - na.nm_rxsync = re_netmap_rxsync; - na.nm_register = re_netmap_reg; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/sys/dev/netmap/ixgbe_netmap.h b/netmap/sys/dev/netmap/ixgbe_netmap.h deleted file mode 100644 index a2d15b8..0000000 --- a/netmap/sys/dev/netmap/ixgbe_netmap.h +++ /dev/null @@ -1,495 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 244514 2012-12-20 22:26:03Z luigi $ - * - * netmap support for: ixgbe - * - * This file is meant to be a reference on how to implement - * netmap support for a network driver. - * This file contains code but only static or inline functions used - * by a single driver. To avoid replication of code we just #include - * it near the beginning of the standard driver. - */ - - -#include -#include -/* - * Some drivers may need the following headers. Others - * already include them by default - -#include -#include - - */ -#include - - -/* - * device-specific sysctl variables: - * - * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. - * During regular operations the CRC is stripped, but on some - * hardware reception of frames not multiple of 64 is slower, - * so using crcstrip=0 helps in benchmarks. - * - * ix_rx_miss, ix_rx_miss_bufs: - * count packets that might be missed due to lost interrupts. - */ -SYSCTL_DECL(_dev_netmap); -static int ix_rx_miss, ix_rx_miss_bufs, ix_crcstrip; -SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip, - CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames"); -SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss, - CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr"); -SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs, - CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs"); - - -static void -set_crcstrip(struct ixgbe_hw *hw, int onoff) -{ - /* crc stripping is set in two places: - * IXGBE_HLREG0 (modified on init_locked and hw reset) - * IXGBE_RDRXCTL (set by the original driver in - * ixgbe_setup_hw_rsc() called in init_locked. - * We disable the setting when netmap is compiled in). - * We update the values here, but also in ixgbe.c because - * init_locked sometimes is called outside our control. - */ - uint32_t hl, rxc; - - hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); - rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - if (netmap_verbose) - D("%s read HLREG 0x%x rxc 0x%x", - onoff ? "enter" : "exit", hl, rxc); - /* hw requirements ... */ - rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; - rxc |= IXGBE_RDRXCTL_RSCACKC; - if (onoff && !ix_crcstrip) { - /* keep the crc. Fast rx */ - hl &= ~IXGBE_HLREG0_RXCRCSTRP; - rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; - } else { - /* reset default mode */ - hl |= IXGBE_HLREG0_RXCRCSTRP; - rxc |= IXGBE_RDRXCTL_CRCSTRIP; - } - if (netmap_verbose) - D("%s write HLREG 0x%x rxc 0x%x", - onoff ? "enter" : "exit", hl, rxc); - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); -} - - -/* - * Register/unregister. We are already under netmap lock. - * Only called on the first register or the last unregister. - */ -static int -ixgbe_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct adapter *adapter = ifp->if_softc; - - IXGBE_CORE_LOCK(adapter); - ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ? - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - set_crcstrip(&adapter->hw, onoff); - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - ixgbe_init_locked(adapter); /* also enables intr */ - set_crcstrip(&adapter->hw, onoff); // XXX why twice ? - IXGBE_CORE_UNLOCK(adapter); - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - * - * All information is in the kring. - * Userspace wants to send packets up to the one before kring->rhead, - * kernel knows kring->nr_hwcur is the first unsent packet. - * - * Here we push packets out (as many as possible), and possibly - * reclaim buffers from previously completed transmission. - * - * The caller (netmap) guarantees that there is only one instance - * running at any time. Any interference with other driver - * methods should be handled by the individual drivers. - */ -static int -ixgbe_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* - * interrupts on every tx packet are expensive so request - * them every half ring, or where NS_REPORT is set - */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; - int reclaim_tx; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - /* - * First part: process new packets to send. - * nm_i is the current index in the netmap ring, - * nic_i is the corresponding index in the NIC ring. - * The two numbers differ because upon a *_init() we reset - * the NIC ring but leave the netmap ring unchanged. - * For the transmit ring, we have - * - * nm_i = kring->nr_hwcur - * nic_i = IXGBE_TDT (not tracked in the driver) - * and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - * - * In this driver kring->nkr_hwofs >= 0, but for other - * drivers it might be negative as well. - */ - - /* - * If we have packets to send (kring->nr_hwcur != kring->rhead) - * iterate over the netmap ring, fetch length and update - * the corresponding slot in the NIC ring. Some drivers also - * need to update the buffer's physical address in the NIC slot - * even NS_BUF_CHANGED is not set (PNMB computes the addresses). - * - * The netmap_reload_map() calls is especially expensive, - * even when (as in this case) the tag is 0, so do only - * when the buffer has actually changed. - * - * If possible do not set the report/intr bit on all slots, - * but only a few times per ring or when NS_REPORT is set. - * - * Finally, on 10G and faster drivers, it might be useful - * to prefetch the next slot and txr entry. - */ - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - - __builtin_prefetch(&ring->slot[nm_i]); - __builtin_prefetch(&txr->tx_buffers[nic_i]); - - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - /* device-specific */ - union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i]; - struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i]; - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - IXGBE_TXD_CMD_RS : 0; - - /* prefetch for next round */ - __builtin_prefetch(&ring->slot[nm_i + 1]); - __builtin_prefetch(&txr->tx_buffers[nic_i + 1]); - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(txr->txtag, txbuf->map, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - /* Use legacy descriptor, they are faster? */ - curr->read.buffer_addr = htole64(paddr); - curr->read.olinfo_status = 0; - curr->read.cmd_type_len = htole32(len | flags | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); - - /* make sure changes to the buffer are synced */ - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - /* synchronize the NIC ring */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* (re)start the tx unit up to slot nic_i (excluded) */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), nic_i); - } - - /* - * Second part: reclaim buffers for completed transmissions. - * Because this is expensive (we read a NIC register etc.) - * we only do it in specific cases (see below). - */ - if (flags & NAF_FORCE_RECLAIM) { - reclaim_tx = 1; /* forced reclaim */ - } else if (!nm_kr_txempty(kring)) { - reclaim_tx = 0; /* have buffers, no reclaim */ - } else { - /* - * No buffers available. Locate previous slot with - * REPORT_STATUS set. - * If the slot has DD set, we can reclaim space, - * otherwise wait for the next interrupt. - * This enables interrupt moderation on the tx - * side though it might reduce throughput. - */ - struct ixgbe_legacy_tx_desc *txd = - (struct ixgbe_legacy_tx_desc *)txr->tx_base; - - nic_i = txr->next_to_clean + report_frequency; - if (nic_i > lim) - nic_i -= lim + 1; - // round to the closest with dd set - nic_i = (nic_i < kring->nkr_num_slots / 4 || - nic_i >= kring->nkr_num_slots*3/4) ? - 0 : report_frequency; - reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ? - } - if (reclaim_tx) { - /* - * Record completed transmissions. - * We (re)use the driver's txr->next_to_clean to keep - * track of the most recently completed transmission. - * - * The datasheet discourages the use of TDH to find - * out the number of sent packets, but we only set - * REPORT_STATUS in a few slots so TDH is the only - * good way. - */ - nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id)); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - if (nic_i != txr->next_to_clean) { - /* some tx completed, increment avail */ - txr->next_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - } - - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - * Same as for the txsync, this routine must be efficient. - * The caller guarantees a single invocations, but races against - * the rest of the driver should be handled here. - * - * On call, kring->rhead is the first packet that userspace wants - * to keep, and kring->rcur is the wakeup point. - * The kernel has previously reported packets up to kring->rtail. - * - * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective - * of whether or not we received an interrupt. - */ -static int -ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id]; - - if (head > lim) - return netmap_ring_reinit(kring); - - /* XXX check sync modes */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part: import newly received packets. - * - * nm_i is the index of the next free slot in the netmap ring, - * nic_i is the index of the next received packet in the NIC ring, - * and they may differ in case if_init() has been called while - * in netmap mode. For the receive ring we have - * - * nic_i = rxr->next_to_check; - * nm_i = kring->nr_hwtail (previous) - * and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - * - * rxr->next_to_check is set to 0 on a ring reinit - */ - if (netmap_no_pendintr || force_update) { - int crclen = ix_crcstrip ? 0 : 4; - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; - uint32_t staterr = le32toh(curr->wb.upper.status_error); - - if ((staterr & IXGBE_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; - ring->slot[nm_i].flags = slot_flags; - bus_dmamap_sync(rxr->ptag, - rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - if (netmap_no_pendintr && !force_update) { - /* diagnostics */ - ix_rx_miss ++; - ix_rx_miss_bufs += n; - } - rxr->next_to_check = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - * (kring->nr_hwcur to kring->rhead excluded), - * and make the buffers available for reception. - * As usual nm_i is the index in the netmap ring, - * nic_i is the index in the NIC ring, and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(slot, &paddr); - - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; - struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i]; - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(rxr->ptag, rxbuf->pmap, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->wb.upper.status_error = 0; - curr->read.pkt_addr = htole64(paddr); - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i); - } - - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * The attach routine, called near the end of ixgbe_attach(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - */ -static void -ixgbe_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = ixgbe_netmap_txsync; - na.nm_rxsync = ixgbe_netmap_rxsync; - na.nm_register = ixgbe_netmap_reg; - na.num_tx_rings = na.num_rx_rings = adapter->num_queues; - netmap_attach(&na); -} - -/* end of file */ diff --git a/netmap/sys/dev/netmap/netmap.c b/netmap/sys/dev/netmap/netmap.c deleted file mode 100644 index 8a244ba..0000000 --- a/netmap/sys/dev/netmap/netmap.c +++ /dev/null @@ -1,2616 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -/* - * $FreeBSD$ - * - * This module supports memory mapped access to network devices, - * see netmap(4). - * - * The module uses a large, memory pool allocated by the kernel - * and accessible as mmapped memory by multiple userspace threads/processes. - * The memory pool contains packet buffers and "netmap rings", - * i.e. user-accessible copies of the interface's queues. - * - * Access to the network card works like this: - * 1. a process/thread issues one or more open() on /dev/netmap, to create - * select()able file descriptor on which events are reported. - * 2. on each descriptor, the process issues an ioctl() to identify - * the interface that should report events to the file descriptor. - * 3. on each descriptor, the process issues an mmap() request to - * map the shared memory region within the process' address space. - * The list of interesting queues is indicated by a location in - * the shared memory region. - * 4. using the functions in the netmap(4) userspace API, a process - * can look up the occupation state of a queue, access memory buffers, - * and retrieve received packets or enqueue packets to transmit. - * 5. using some ioctl()s the process can synchronize the userspace view - * of the queue with the actual status in the kernel. This includes both - * receiving the notification of new packets, and transmitting new - * packets on the output interface. - * 6. select() or poll() can be used to wait for events on individual - * transmit or receive queues (or all queues for a given interface). - * - - SYNCHRONIZATION (USER) - -The netmap rings and data structures may be shared among multiple -user threads or even independent processes. -Any synchronization among those threads/processes is delegated -to the threads themselves. Only one thread at a time can be in -a system call on the same netmap ring. The OS does not enforce -this and only guarantees against system crashes in case of -invalid usage. - - LOCKING (INTERNAL) - -Within the kernel, access to the netmap rings is protected as follows: - -- a spinlock on each ring, to handle producer/consumer races on - RX rings attached to the host stack (against multiple host - threads writing from the host stack to the same ring), - and on 'destination' rings attached to a VALE switch - (i.e. RX rings in VALE ports, and TX rings in NIC/host ports) - protecting multiple active senders for the same destination) - -- an atomic variable to guarantee that there is at most one - instance of *_*xsync() on the ring at any time. - For rings connected to user file - descriptors, an atomic_test_and_set() protects this, and the - lock on the ring is not actually used. - For NIC RX rings connected to a VALE switch, an atomic_test_and_set() - is also used to prevent multiple executions (the driver might indeed - already guarantee this). - For NIC TX rings connected to a VALE switch, the lock arbitrates - access to the queue (both when allocating buffers and when pushing - them out). - -- *xsync() should be protected against initializations of the card. - On FreeBSD most devices have the reset routine protected by - a RING lock (ixgbe, igb, em) or core lock (re). lem is missing - the RING protection on rx_reset(), this should be added. - - On linux there is an external lock on the tx path, which probably - also arbitrates access to the reset routine. XXX to be revised - -- a per-interface core_lock protecting access from the host stack - while interfaces may be detached from netmap mode. - XXX there should be no need for this lock if we detach the interfaces - only while they are down. - - ---- VALE SWITCH --- - -NMG_LOCK() serializes all modifications to switches and ports. -A switch cannot be deleted until all ports are gone. - -For each switch, an SX lock (RWlock on linux) protects -deletion of ports. When configuring or deleting a new port, the -lock is acquired in exclusive mode (after holding NMG_LOCK). -When forwarding, the lock is acquired in shared mode (without NMG_LOCK). -The lock is held throughout the entire forwarding cycle, -during which the thread may incur in a page fault. -Hence it is important that sleepable shared locks are used. - -On the rx ring, the per-port lock is grabbed initially to reserve -a number of slot in the ring, then the lock is released, -packets are copied from source to destination, and then -the lock is acquired again and the receive ring is updated. -(A similar thing is done on the tx ring for NIC and host stack -ports attached to the switch) - - */ - -/* - * OS-specific code that is used only within this file. - * Other OS-specific code that must be accessed by drivers - * is present in netmap_kern.h - */ - -#if defined(__FreeBSD__) -#include /* prerequisite */ -#include -#include -#include /* defines used in kernel.h */ -#include /* types used in module initialization */ -#include /* cdevsw struct, UID, GID */ -#include /* FIONBIO */ -#include -#include /* struct socket */ -#include -#include -#include -#include /* sockaddrs */ -#include -#include -#include -#include -#include -#include -#include /* BIOCIMMEDIATE */ -#include /* bus_dmamap_* */ -#include -#include - - -/* reduce conditional code */ -// linux API, use for the knlist in FreeBSD -#define init_waitqueue_head(x) knlist_init_mtx(&(x)->si_note, NULL) - -void freebsd_selwakeup(struct selinfo *si, int pri); -#define OS_selwakeup(a, b) freebsd_selwakeup(a, b) - -#elif defined(linux) - -#include "bsd_glue.h" - - - -#elif defined(__APPLE__) - -#warning OSX support is only partial -#include "osx_glue.h" - -#else - -#error Unsupported platform - -#endif /* unsupported */ - -/* - * common headers - */ -#include -#include -#include - - -MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); - -/* - * The following variables are used by the drivers and replicate - * fields in the global memory pool. They only refer to buffers - * used by physical interfaces. - */ -u_int netmap_total_buffers; -u_int netmap_buf_size; -char *netmap_buffer_base; /* also address of an invalid buffer */ - -/* user-controlled variables */ -int netmap_verbose; - -static int netmap_no_timestamp; /* don't timestamp on rxsync */ - -SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args"); -SYSCTL_INT(_dev_netmap, OID_AUTO, verbose, - CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode"); -SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp, - CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp"); -int netmap_mitigate = 1; -SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, ""); -int netmap_no_pendintr = 1; -SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, - CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets."); -int netmap_txsync_retry = 2; -SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW, - &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush."); - -int netmap_flags = 0; /* debug flags */ -int netmap_fwd = 0; /* force transparent mode */ -int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */ - -/* - * netmap_admode selects the netmap mode to use. - * Invalid values are reset to NETMAP_ADMODE_BEST - */ -enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */ - NETMAP_ADMODE_NATIVE, /* either native or none */ - NETMAP_ADMODE_GENERIC, /* force generic */ - NETMAP_ADMODE_LAST }; -static int netmap_admode = NETMAP_ADMODE_BEST; - -int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */ -int netmap_generic_ringsize = 1024; /* Generic ringsize. */ -int netmap_generic_rings = 1; /* number of queues in generic. */ - -SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , ""); - -NMG_LOCK_T netmap_global_lock; - - -static void -nm_kr_get(struct netmap_kring *kr) -{ - while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) - tsleep(kr, 0, "NM_KR_GET", 4); -} - - -/* - * mark the ring as stopped, and run through the locks - * to make sure other users get to see it. - */ -void -netmap_disable_ring(struct netmap_kring *kr) -{ - kr->nkr_stopped = 1; - nm_kr_get(kr); - mtx_lock(&kr->q_lock); - mtx_unlock(&kr->q_lock); - nm_kr_put(kr); -} - - -static void -netmap_set_all_rings(struct ifnet *ifp, int stopped) -{ - struct netmap_adapter *na; - int i; - u_int ntx, nrx; - - if (!(ifp->if_capenable & IFCAP_NETMAP)) - return; - - na = NA(ifp); - - ntx = netmap_real_tx_rings(na); - nrx = netmap_real_rx_rings(na); - - for (i = 0; i < ntx; i++) { - if (stopped) - netmap_disable_ring(na->tx_rings + i); - else - na->tx_rings[i].nkr_stopped = 0; - na->nm_notify(na, i, NR_TX, NAF_DISABLE_NOTIFY); - } - - for (i = 0; i < nrx; i++) { - if (stopped) - netmap_disable_ring(na->rx_rings + i); - else - na->rx_rings[i].nkr_stopped = 0; - na->nm_notify(na, i, NR_RX, NAF_DISABLE_NOTIFY); - } -} - - -void -netmap_disable_all_rings(struct ifnet *ifp) -{ - netmap_set_all_rings(ifp, 1 /* stopped */); -} - - -void -netmap_enable_all_rings(struct ifnet *ifp) -{ - netmap_set_all_rings(ifp, 0 /* enabled */); -} - - -/* - * generic bound_checking function - */ -u_int -nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg) -{ - u_int oldv = *v; - const char *op = NULL; - - if (dflt < lo) - dflt = lo; - if (dflt > hi) - dflt = hi; - if (oldv < lo) { - *v = dflt; - op = "Bump"; - } else if (oldv > hi) { - *v = hi; - op = "Clamp"; - } - if (op && msg) - printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); - return *v; -} - - -/* - * packet-dump function, user-supplied or static buffer. - * The destination buffer must be at least 30+4*len - */ -const char * -nm_dump_buf(char *p, int len, int lim, char *dst) -{ - static char _dst[8192]; - int i, j, i0; - static char hex[] ="0123456789abcdef"; - char *o; /* output position */ - -#define P_HI(x) hex[((x) & 0xf0)>>4] -#define P_LO(x) hex[((x) & 0xf)] -#define P_C(x) ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.') - if (!dst) - dst = _dst; - if (lim <= 0 || lim > len) - lim = len; - o = dst; - sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim); - o += strlen(o); - /* hexdump routine */ - for (i = 0; i < lim; ) { - sprintf(o, "%5d: ", i); - o += strlen(o); - memset(o, ' ', 48); - i0 = i; - for (j=0; j < 16 && i < lim; i++, j++) { - o[j*3] = P_HI(p[i]); - o[j*3+1] = P_LO(p[i]); - } - i = i0; - for (j=0; j < 16 && i < lim; i++, j++) - o[j + 48] = P_C(p[i]); - o[j+48] = '\n'; - o += j+49; - } - *o = '\0'; -#undef P_HI -#undef P_LO -#undef P_C - return dst; -} - - -/* - * Fetch configuration from the device, to cope with dynamic - * reconfigurations after loading the module. - */ -int -netmap_update_config(struct netmap_adapter *na) -{ - struct ifnet *ifp = na->ifp; - u_int txr, txd, rxr, rxd; - - txr = txd = rxr = rxd = 0; - if (na->nm_config) { - na->nm_config(na, &txr, &txd, &rxr, &rxd); - } else { - /* take whatever we had at init time */ - txr = na->num_tx_rings; - txd = na->num_tx_desc; - rxr = na->num_rx_rings; - rxd = na->num_rx_desc; - } - - if (na->num_tx_rings == txr && na->num_tx_desc == txd && - na->num_rx_rings == rxr && na->num_rx_desc == rxd) - return 0; /* nothing changed */ - if (netmap_verbose || na->active_fds > 0) { - D("stored config %s: txring %d x %d, rxring %d x %d", - NM_IFPNAME(ifp), - na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc); - D("new config %s: txring %d x %d, rxring %d x %d", - NM_IFPNAME(ifp), txr, txd, rxr, rxd); - } - if (na->active_fds == 0) { - D("configuration changed (but fine)"); - na->num_tx_rings = txr; - na->num_tx_desc = txd; - na->num_rx_rings = rxr; - na->num_rx_desc = rxd; - return 0; - } - D("configuration changed while active, this is bad..."); - return 1; -} - -static int -netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags) -{ - (void)flags; - netmap_txsync_to_host(kring->na); - return 0; -} - -static int -netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags) -{ - (void)flags; - netmap_rxsync_from_host(kring->na, NULL, NULL); - return 0; -} - - - -/* create the krings array and initialize the fields common to all adapters. - * The array layout is this: - * - * +----------+ - * na->tx_rings ----->| | \ - * | | } na->num_tx_ring - * | | / - * +----------+ - * | | host tx kring - * na->rx_rings ----> +----------+ - * | | \ - * | | } na->num_rx_rings - * | | / - * +----------+ - * | | host rx kring - * +----------+ - * na->tailroom ----->| | \ - * | | } tailroom bytes - * | | / - * +----------+ - * - * Note: for compatibility, host krings are created even when not needed. - * The tailroom space is currently used by vale ports for allocating leases. - */ -int -netmap_krings_create(struct netmap_adapter *na, u_int tailroom) -{ - u_int i, len, ndesc; - struct netmap_kring *kring; - u_int ntx, nrx; - - /* account for the (possibly fake) host rings */ - ntx = na->num_tx_rings + 1; - nrx = na->num_rx_rings + 1; - - len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom; - - na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO); - if (na->tx_rings == NULL) { - D("Cannot allocate krings"); - return ENOMEM; - } - na->rx_rings = na->tx_rings + ntx; - - /* - * All fields in krings are 0 except the one initialized below. - * but better be explicit on important kring fields. - */ - ndesc = na->num_tx_desc; - for (i = 0; i < ntx; i++) { /* Transmit rings */ - kring = &na->tx_rings[i]; - bzero(kring, sizeof(*kring)); - kring->na = na; - kring->ring_id = i; - kring->nkr_num_slots = ndesc; - if (i < na->num_tx_rings) { - kring->nm_sync = na->nm_txsync; - } else if (i == na->num_tx_rings) { - kring->nm_sync = netmap_txsync_to_host_compat; - } - /* - * IMPORTANT: Always keep one slot empty. - */ - kring->rhead = kring->rcur = kring->nr_hwcur = 0; - kring->rtail = kring->nr_hwtail = ndesc - 1; - snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", NM_IFPNAME(na->ifp), i); - ND("ktx %s h %d c %d t %d", - kring->name, kring->rhead, kring->rcur, kring->rtail); - mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF); - init_waitqueue_head(&kring->si); - } - - ndesc = na->num_rx_desc; - for (i = 0; i < nrx; i++) { /* Receive rings */ - kring = &na->rx_rings[i]; - bzero(kring, sizeof(*kring)); - kring->na = na; - kring->ring_id = i; - kring->nkr_num_slots = ndesc; - if (i < na->num_rx_rings) { - kring->nm_sync = na->nm_rxsync; - } else if (i == na->num_rx_rings) { - kring->nm_sync = netmap_rxsync_from_host_compat; - } - kring->rhead = kring->rcur = kring->nr_hwcur = 0; - kring->rtail = kring->nr_hwtail = 0; - snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", NM_IFPNAME(na->ifp), i); - ND("krx %s h %d c %d t %d", - kring->name, kring->rhead, kring->rcur, kring->rtail); - mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF); - init_waitqueue_head(&kring->si); - } - init_waitqueue_head(&na->tx_si); - init_waitqueue_head(&na->rx_si); - - na->tailroom = na->rx_rings + nrx; - - return 0; -} - - -/* undo the actions performed by netmap_krings_create */ -void -netmap_krings_delete(struct netmap_adapter *na) -{ - struct netmap_kring *kring = na->tx_rings; - - /* we rely on the krings layout described above */ - for ( ; kring != na->tailroom; kring++) { - mtx_destroy(&kring->q_lock); - } - free(na->tx_rings, M_DEVBUF); - na->tx_rings = na->rx_rings = na->tailroom = NULL; -} - - -/* - * Destructor for NIC ports. They also have an mbuf queue - * on the rings connected to the host so we need to purge - * them first. - */ -static void -netmap_hw_krings_delete(struct netmap_adapter *na) -{ - struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue; - - ND("destroy sw mbq with len %d", mbq_len(q)); - mbq_purge(q); - mbq_safe_destroy(q); - netmap_krings_delete(na); -} - - -static struct netmap_if* -netmap_if_new(const char *ifname, struct netmap_adapter *na) -{ - struct netmap_if *nifp; - - if (netmap_update_config(na)) { - /* configuration mismatch, report and fail */ - return NULL; - } - - if (na->active_fds) - goto final; - - if (na->nm_krings_create(na)) - goto cleanup; - - if (netmap_mem_rings_create(na)) - goto cleanup; - -final: - - nifp = netmap_mem_if_new(ifname, na); - if (nifp == NULL) - goto cleanup; - - return (nifp); - -cleanup: - - if (na->active_fds == 0) { - netmap_mem_rings_delete(na); - na->nm_krings_delete(na); - } - - return NULL; -} - - -/* grab a reference to the memory allocator, if we don't have one already. The - * reference is taken from the netmap_adapter registered with the priv. - * - */ -static int -netmap_get_memory_locked(struct netmap_priv_d* p) -{ - struct netmap_mem_d *nmd; - int error = 0; - - if (p->np_na == NULL) { - if (!netmap_mmap_unreg) - return ENODEV; - /* for compatibility with older versions of the API - * we use the global allocator when no interface has been - * registered - */ - nmd = &nm_mem; - } else { - nmd = p->np_na->nm_mem; - } - if (p->np_mref == NULL) { - error = netmap_mem_finalize(nmd); - if (!error) - p->np_mref = nmd; - } else if (p->np_mref != nmd) { - /* a virtual port has been registered, but previous - * syscalls already used the global allocator. - * We cannot continue - */ - error = ENODEV; - } - return error; -} - - -int -netmap_get_memory(struct netmap_priv_d* p) -{ - int error; - NMG_LOCK(); - error = netmap_get_memory_locked(p); - NMG_UNLOCK(); - return error; -} - - -static int -netmap_have_memory_locked(struct netmap_priv_d* p) -{ - return p->np_mref != NULL; -} - - -static void -netmap_drop_memory_locked(struct netmap_priv_d* p) -{ - if (p->np_mref) { - netmap_mem_deref(p->np_mref); - p->np_mref = NULL; - } -} - - -/* - * File descriptor's private data destructor. - * - * Call nm_register(ifp,0) to stop netmap mode on the interface and - * revert to normal operation. We expect that np_na->ifp has not gone. - * The second argument is the nifp to work on. In some cases it is - * not attached yet to the netmap_priv_d so we need to pass it as - * a separate argument. - */ -/* call with NMG_LOCK held */ -static void -netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp) -{ - struct netmap_adapter *na = priv->np_na; - struct ifnet *ifp = na->ifp; - - NMG_LOCK_ASSERT(); - na->active_fds--; - if (na->active_fds <= 0) { /* last instance */ - - if (netmap_verbose) - D("deleting last instance for %s", NM_IFPNAME(ifp)); - /* - * (TO CHECK) This function is only called - * when the last reference to this file descriptor goes - * away. This means we cannot have any pending poll() - * or interrupt routine operating on the structure. - * XXX The file may be closed in a thread while - * another thread is using it. - * Linux keeps the file opened until the last reference - * by any outstanding ioctl/poll or mmap is gone. - * FreeBSD does not track mmap()s (but we do) and - * wakes up any sleeping poll(). Need to check what - * happens if the close() occurs while a concurrent - * syscall is running. - */ - if (ifp) - na->nm_register(na, 0); /* off, clear flags */ - /* Wake up any sleeping threads. netmap_poll will - * then return POLLERR - * XXX The wake up now must happen during *_down(), when - * we order all activities to stop. -gl - */ - /* XXX kqueue(9) needed; these will mirror knlist_init. */ - /* knlist_destroy(&na->tx_si.si_note); */ - /* knlist_destroy(&na->rx_si.si_note); */ - - /* delete rings and buffers */ - netmap_mem_rings_delete(na); - na->nm_krings_delete(na); - } - /* delete the nifp */ - netmap_mem_if_delete(na, nifp); -} - -static __inline int -nm_tx_si_user(struct netmap_priv_d *priv) -{ - return (priv->np_na != NULL && - (priv->np_txqlast - priv->np_txqfirst > 1)); -} - -static __inline int -nm_rx_si_user(struct netmap_priv_d *priv) -{ - return (priv->np_na != NULL && - (priv->np_rxqlast - priv->np_rxqfirst > 1)); -} - - -/* - * returns 1 if this is the last instance and we can free priv - */ -int -netmap_dtor_locked(struct netmap_priv_d *priv) -{ - struct netmap_adapter *na = priv->np_na; - -#ifdef __FreeBSD__ - /* - * np_refcount is the number of active mmaps on - * this file descriptor - */ - if (--priv->np_refcount > 0) { - return 0; - } -#endif /* __FreeBSD__ */ - if (!na) { - return 1; //XXX is it correct? - } - netmap_do_unregif(priv, priv->np_nifp); - priv->np_nifp = NULL; - netmap_drop_memory_locked(priv); - if (priv->np_na) { - if (nm_tx_si_user(priv)) - na->tx_si_users--; - if (nm_rx_si_user(priv)) - na->rx_si_users--; - netmap_adapter_put(na); - priv->np_na = NULL; - } - return 1; -} - - -void -netmap_dtor(void *data) -{ - struct netmap_priv_d *priv = data; - int last_instance; - - NMG_LOCK(); - last_instance = netmap_dtor_locked(priv); - NMG_UNLOCK(); - if (last_instance) { - bzero(priv, sizeof(*priv)); /* for safety */ - free(priv, M_DEVBUF); - } -} - - - - -/* - * Handlers for synchronization of the queues from/to the host. - * Netmap has two operating modes: - * - in the default mode, the rings connected to the host stack are - * just another ring pair managed by userspace; - * - in transparent mode (XXX to be defined) incoming packets - * (from the host or the NIC) are marked as NS_FORWARD upon - * arrival, and the user application has a chance to reset the - * flag for packets that should be dropped. - * On the RXSYNC or poll(), packets in RX rings between - * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved - * to the other side. - * The transfer NIC --> host is relatively easy, just encapsulate - * into mbufs and we are done. The host --> NIC side is slightly - * harder because there might not be room in the tx ring so it - * might take a while before releasing the buffer. - */ - - -/* - * pass a chain of buffers to the host stack as coming from 'dst' - * We do not need to lock because the queue is private. - */ -static void -netmap_send_up(struct ifnet *dst, struct mbq *q) -{ - struct mbuf *m; - - /* send packets up, outside the lock */ - while ((m = mbq_dequeue(q)) != NULL) { - if (netmap_verbose & NM_VERB_HOST) - D("sending up pkt %p size %d", m, MBUF_LEN(m)); - NM_SEND_UP(dst, m); - } - mbq_destroy(q); -} - - -/* - * put a copy of the buffers marked NS_FORWARD into an mbuf chain. - * Take packets from hwcur to ring->head marked NS_FORWARD (or forced) - * and pass them up. Drop remaining packets in the unlikely event - * of an mbuf shortage. - */ -static void -netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) -{ - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->ring->head; - u_int n; - struct netmap_adapter *na = kring->na; - - for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) { - struct mbuf *m; - struct netmap_slot *slot = &kring->ring->slot[n]; - - if ((slot->flags & NS_FORWARD) == 0 && !force) - continue; - if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { - RD(5, "bad pkt at %d len %d", n, slot->len); - continue; - } - slot->flags &= ~NS_FORWARD; // XXX needed ? - /* XXX TODO: adapt to the case of a multisegment packet */ - m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL); - - if (m == NULL) - break; - mbq_enqueue(q, m); - } -} - - -/* - * Send to the NIC rings packets marked NS_FORWARD between - * kring->nr_hwcur and kring->rhead - * Called under kring->rx_queue.lock on the sw rx ring, - */ -static u_int -netmap_sw_to_nic(struct netmap_adapter *na) -{ - struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; - struct netmap_slot *rxslot = kring->ring->slot; - u_int i, rxcur = kring->nr_hwcur; - u_int const head = kring->rhead; - u_int const src_lim = kring->nkr_num_slots - 1; - u_int sent = 0; - - /* scan rings to find space, then fill as much as possible */ - for (i = 0; i < na->num_tx_rings; i++) { - struct netmap_kring *kdst = &na->tx_rings[i]; - struct netmap_ring *rdst = kdst->ring; - u_int const dst_lim = kdst->nkr_num_slots - 1; - - /* XXX do we trust ring or kring->rcur,rtail ? */ - for (; rxcur != head && !nm_ring_empty(rdst); - rxcur = nm_next(rxcur, src_lim) ) { - struct netmap_slot *src, *dst, tmp; - u_int dst_cur = rdst->cur; - - src = &rxslot[rxcur]; - if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd) - continue; - - sent++; - - dst = &rdst->slot[dst_cur]; - - tmp = *src; - - src->buf_idx = dst->buf_idx; - src->flags = NS_BUF_CHANGED; - - dst->buf_idx = tmp.buf_idx; - dst->len = tmp.len; - dst->flags = NS_BUF_CHANGED; - - rdst->cur = nm_next(dst_cur, dst_lim); - } - /* if (sent) XXX txsync ? */ - } - return sent; -} - - -/* - * netmap_txsync_to_host() passes packets up. We are called from a - * system call in user process context, and the only contention - * can be among multiple user threads erroneously calling - * this routine concurrently. - */ -void -netmap_txsync_to_host(struct netmap_adapter *na) -{ - struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; - struct netmap_ring *ring = kring->ring; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - struct mbq q; - - /* Take packets from hwcur to head and pass them up. - * force head = cur since netmap_grab_packets() stops at head - * In case of no buffers we give up. At the end of the loop, - * the queue is drained in all cases. - */ - mbq_init(&q); - ring->cur = head; - netmap_grab_packets(kring, &q, 1 /* force */); - ND("have %d pkts in queue", mbq_len(&q)); - kring->nr_hwcur = head; - kring->nr_hwtail = head + lim; - if (kring->nr_hwtail > lim) - kring->nr_hwtail -= lim + 1; - nm_txsync_finalize(kring); - - netmap_send_up(na->ifp, &q); -} - - -/* - * rxsync backend for packets coming from the host stack. - * They have been put in kring->rx_queue by netmap_transmit(). - * We protect access to the kring using kring->rx_queue.lock - * - * This routine also does the selrecord if called from the poll handler - * (we know because td != NULL). - * - * NOTE: on linux, selrecord() is defined as a macro and uses pwait - * as an additional hidden argument. - * returns the number of packets delivered to tx queues in - * transparent mode, or a negative value if error - */ -int -netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait) -{ - struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; - struct netmap_ring *ring = kring->ring; - u_int nm_i, n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - int ret = 0; - struct mbq *q = &kring->rx_queue; - - (void)pwait; /* disable unused warnings */ - (void)td; - - mtx_lock(&q->lock); - - /* First part: import newly received packets */ - n = mbq_len(q); - if (n) { /* grab packets from the queue */ - struct mbuf *m; - uint32_t stop_i; - - nm_i = kring->nr_hwtail; - stop_i = nm_prev(nm_i, lim); - while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) { - int len = MBUF_LEN(m); - struct netmap_slot *slot = &ring->slot[nm_i]; - - m_copydata(m, 0, len, BDG_NMB(na, slot)); - ND("nm %d len %d", nm_i, len); - if (netmap_verbose) - D("%s", nm_dump_buf(BDG_NMB(na, slot),len, 128, NULL)); - - slot->len = len; - slot->flags = kring->nkr_slot_flags; - nm_i = nm_next(nm_i, lim); - m_freem(m); - } - kring->nr_hwtail = nm_i; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* something was released */ - if (netmap_fwd || kring->ring->flags & NR_FORWARD) - ret = netmap_sw_to_nic(na); - kring->nr_hwcur = head; - } - - nm_rxsync_finalize(kring); - - /* access copies of cur,tail in the kring */ - if (kring->rcur == kring->rtail && td) /* no bufs available */ - selrecord(td, &kring->si); - - mtx_unlock(&q->lock); - return ret; -} - - -/* Get a netmap adapter for the port. - * - * If it is possible to satisfy the request, return 0 - * with *na containing the netmap adapter found. - * Otherwise return an error code, with *na containing NULL. - * - * When the port is attached to a bridge, we always return - * EBUSY. - * Otherwise, if the port is already bound to a file descriptor, - * then we unconditionally return the existing adapter into *na. - * In all the other cases, we return (into *na) either native, - * generic or NULL, according to the following table: - * - * native_support - * active_fds dev.netmap.admode YES NO - * ------------------------------------------------------- - * >0 * NA(ifp) NA(ifp) - * - * 0 NETMAP_ADMODE_BEST NATIVE GENERIC - * 0 NETMAP_ADMODE_NATIVE NATIVE NULL - * 0 NETMAP_ADMODE_GENERIC GENERIC GENERIC - * - */ - -int -netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) -{ - /* generic support */ - int i = netmap_admode; /* Take a snapshot. */ - int error = 0; - struct netmap_adapter *prev_na; - struct netmap_generic_adapter *gna; - - *na = NULL; /* default */ - - /* reset in case of invalid value */ - if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST) - i = netmap_admode = NETMAP_ADMODE_BEST; - - if (NETMAP_CAPABLE(ifp)) { - /* If an adapter already exists, but is - * attached to a vale port, we report that the - * port is busy. - */ - if (NETMAP_OWNED_BY_KERN(NA(ifp))) - return EBUSY; - - /* If an adapter already exists, return it if - * there are active file descriptors or if - * netmap is not forced to use generic - * adapters. - */ - if (NA(ifp)->active_fds > 0 - || i != NETMAP_ADMODE_GENERIC -#ifdef WITH_PIPES - /* ugly, but we cannot allow an adapter switch - * if some pipe is referring to this one - */ - || NA(ifp)->na_next_pipe > 0 -#endif - ) { - *na = NA(ifp); - return 0; - } - } - - /* If there isn't native support and netmap is not allowed - * to use generic adapters, we cannot satisfy the request. - */ - if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE) - return EOPNOTSUPP; - - /* Otherwise, create a generic adapter and return it, - * saving the previously used netmap adapter, if any. - * - * Note that here 'prev_na', if not NULL, MUST be a - * native adapter, and CANNOT be a generic one. This is - * true because generic adapters are created on demand, and - * destroyed when not used anymore. Therefore, if the adapter - * currently attached to an interface 'ifp' is generic, it - * must be that - * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))). - * Consequently, if NA(ifp) is generic, we will enter one of - * the branches above. This ensures that we never override - * a generic adapter with another generic adapter. - */ - prev_na = NA(ifp); - error = generic_netmap_attach(ifp); - if (error) - return error; - - *na = NA(ifp); - gna = (struct netmap_generic_adapter*)NA(ifp); - gna->prev = prev_na; /* save old na */ - if (prev_na != NULL) { - ifunit_ref(ifp->if_xname); - // XXX add a refcount ? - netmap_adapter_get(prev_na); - } - ND("Created generic NA %p (prev %p)", gna, gna->prev); - - return 0; -} - - -/* - * MUST BE CALLED UNDER NMG_LOCK() - * - * Get a refcounted reference to a netmap adapter attached - * to the interface specified by nmr. - * This is always called in the execution of an ioctl(). - * - * Return ENXIO if the interface specified by the request does - * not exist, ENOTSUP if netmap is not supported by the interface, - * EBUSY if the interface is already attached to a bridge, - * EINVAL if parameters are invalid, ENOMEM if needed resources - * could not be allocated. - * If successful, hold a reference to the netmap adapter. - * - * No reference is kept on the real interface, which may then - * disappear at any time. - */ -int -netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create) -{ - struct ifnet *ifp = NULL; - int error = 0; - struct netmap_adapter *ret = NULL; - - *na = NULL; /* default return value */ - - /* first try to see if this is a bridge port. */ - NMG_LOCK_ASSERT(); - - error = netmap_get_pipe_na(nmr, na, create); - if (error || *na != NULL) - return error; - - error = netmap_get_bdg_na(nmr, na, create); - if (error) - return error; - - if (*na != NULL) /* valid match in netmap_get_bdg_na() */ - goto pipes; - - ifp = ifunit_ref(nmr->nr_name); - if (ifp == NULL) { - return ENXIO; - } - - error = netmap_get_hw_na(ifp, &ret); - if (error) - goto out; - - /* Users cannot use the NIC attached to a bridge directly */ - if (NETMAP_OWNED_BY_KERN(ret)) { - error = EBUSY; - goto out; - } - *na = ret; - netmap_adapter_get(ret); - -pipes: - error = netmap_pipe_alloc(*na, nmr); - -out: - if (error && ret != NULL) - netmap_adapter_put(ret); - - if (ifp) - if_rele(ifp); - - return error; -} - - -/* - * validate parameters on entry for *_txsync() - * Returns ring->cur if ok, or something >= kring->nkr_num_slots - * in case of error. - * - * rhead, rcur and rtail=hwtail are stored from previous round. - * hwcur is the next packet to send to the ring. - * - * We want - * hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail - * - * hwcur, rhead, rtail and hwtail are reliable - */ -u_int -nm_txsync_prologue(struct netmap_kring *kring) -{ - struct netmap_ring *ring = kring->ring; - u_int head = ring->head; /* read only once */ - u_int cur = ring->cur; /* read only once */ - u_int n = kring->nkr_num_slots; - - ND(5, "%s kcur %d ktail %d head %d cur %d tail %d", - kring->name, - kring->nr_hwcur, kring->nr_hwtail, - ring->head, ring->cur, ring->tail); -#if 1 /* kernel sanity checks; but we can trust the kring. */ - if (kring->nr_hwcur >= n || kring->rhead >= n || - kring->rtail >= n || kring->nr_hwtail >= n) - goto error; -#endif /* kernel sanity checks */ - /* - * user sanity checks. We only use 'cur', - * A, B, ... are possible positions for cur: - * - * 0 A cur B tail C n-1 - * 0 D tail E cur F n-1 - * - * B, F, D are valid. A, C, E are wrong - */ - if (kring->rtail >= kring->rhead) { - /* want rhead <= head <= rtail */ - if (head < kring->rhead || head > kring->rtail) - goto error; - /* and also head <= cur <= rtail */ - if (cur < head || cur > kring->rtail) - goto error; - } else { /* here rtail < rhead */ - /* we need head outside rtail .. rhead */ - if (head > kring->rtail && head < kring->rhead) - goto error; - - /* two cases now: head <= rtail or head >= rhead */ - if (head <= kring->rtail) { - /* want head <= cur <= rtail */ - if (cur < head || cur > kring->rtail) - goto error; - } else { /* head >= rhead */ - /* cur must be outside rtail..head */ - if (cur > kring->rtail && cur < head) - goto error; - } - } - if (ring->tail != kring->rtail) { - RD(5, "tail overwritten was %d need %d", - ring->tail, kring->rtail); - ring->tail = kring->rtail; - } - kring->rhead = head; - kring->rcur = cur; - return head; - -error: - RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d", - kring->name, - kring->nr_hwcur, - kring->rcur, kring->nr_hwtail, - cur, ring->tail); - return n; -} - - -/* - * validate parameters on entry for *_rxsync() - * Returns ring->head if ok, kring->nkr_num_slots on error. - * - * For a valid configuration, - * hwcur <= head <= cur <= tail <= hwtail - * - * We only consider head and cur. - * hwcur and hwtail are reliable. - * - */ -u_int -nm_rxsync_prologue(struct netmap_kring *kring) -{ - struct netmap_ring *ring = kring->ring; - uint32_t const n = kring->nkr_num_slots; - uint32_t head, cur; - - ND("%s kc %d kt %d h %d c %d t %d", - kring->name, - kring->nr_hwcur, kring->nr_hwtail, - ring->head, ring->cur, ring->tail); - /* - * Before storing the new values, we should check they do not - * move backwards. However: - * - head is not an issue because the previous value is hwcur; - * - cur could in principle go back, however it does not matter - * because we are processing a brand new rxsync() - */ - cur = kring->rcur = ring->cur; /* read only once */ - head = kring->rhead = ring->head; /* read only once */ -#if 1 /* kernel sanity checks */ - if (kring->nr_hwcur >= n || kring->nr_hwtail >= n) - goto error; -#endif /* kernel sanity checks */ - /* user sanity checks */ - if (kring->nr_hwtail >= kring->nr_hwcur) { - /* want hwcur <= rhead <= hwtail */ - if (head < kring->nr_hwcur || head > kring->nr_hwtail) - goto error; - /* and also rhead <= rcur <= hwtail */ - if (cur < head || cur > kring->nr_hwtail) - goto error; - } else { - /* we need rhead outside hwtail..hwcur */ - if (head < kring->nr_hwcur && head > kring->nr_hwtail) - goto error; - /* two cases now: head <= hwtail or head >= hwcur */ - if (head <= kring->nr_hwtail) { - /* want head <= cur <= hwtail */ - if (cur < head || cur > kring->nr_hwtail) - goto error; - } else { - /* cur must be outside hwtail..head */ - if (cur < head && cur > kring->nr_hwtail) - goto error; - } - } - if (ring->tail != kring->rtail) { - RD(5, "%s tail overwritten was %d need %d", - kring->name, - ring->tail, kring->rtail); - ring->tail = kring->rtail; - } - return head; - -error: - RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d", - kring->nr_hwcur, - kring->rcur, kring->nr_hwtail, - kring->rhead, kring->rcur, ring->tail); - return n; -} - - -/* - * Error routine called when txsync/rxsync detects an error. - * Can't do much more than resetting head =cur = hwcur, tail = hwtail - * Return 1 on reinit. - * - * This routine is only called by the upper half of the kernel. - * It only reads hwcur (which is changed only by the upper half, too) - * and hwtail (which may be changed by the lower half, but only on - * a tx ring and only to increase it, so any error will be recovered - * on the next call). For the above, we don't strictly need to call - * it under lock. - */ -int -netmap_ring_reinit(struct netmap_kring *kring) -{ - struct netmap_ring *ring = kring->ring; - u_int i, lim = kring->nkr_num_slots - 1; - int errors = 0; - - // XXX KASSERT nm_kr_tryget - RD(10, "called for %s", NM_IFPNAME(kring->na->ifp)); - // XXX probably wrong to trust userspace - kring->rhead = ring->head; - kring->rcur = ring->cur; - kring->rtail = ring->tail; - - if (ring->cur > lim) - errors++; - if (ring->head > lim) - errors++; - if (ring->tail > lim) - errors++; - for (i = 0; i <= lim; i++) { - u_int idx = ring->slot[i].buf_idx; - u_int len = ring->slot[i].len; - if (idx < 2 || idx >= netmap_total_buffers) { - RD(5, "bad index at slot %d idx %d len %d ", i, idx, len); - ring->slot[i].buf_idx = 0; - ring->slot[i].len = 0; - } else if (len > NETMAP_BDG_BUF_SIZE(kring->na->nm_mem)) { - ring->slot[i].len = 0; - RD(5, "bad len at slot %d idx %d len %d", i, idx, len); - } - } - if (errors) { - RD(10, "total %d errors", errors); - RD(10, "%s reinit, cur %d -> %d tail %d -> %d", - kring->name, - ring->cur, kring->nr_hwcur, - ring->tail, kring->nr_hwtail); - ring->head = kring->rhead = kring->nr_hwcur; - ring->cur = kring->rcur = kring->nr_hwcur; - ring->tail = kring->rtail = kring->nr_hwtail; - } - return (errors ? 1 : 0); -} - - -/* - * Set the ring ID. For devices with a single queue, a request - * for all rings is the same as a single ring. - */ -static int -netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) -{ - struct netmap_adapter *na = priv->np_na; - u_int j, i = ringid & NETMAP_RING_MASK; - u_int reg = flags & NR_REG_MASK; - - if (reg == NR_REG_DEFAULT) { - /* convert from old ringid to flags */ - if (ringid & NETMAP_SW_RING) { - reg = NR_REG_SW; - } else if (ringid & NETMAP_HW_RING) { - reg = NR_REG_ONE_NIC; - } else { - reg = NR_REG_ALL_NIC; - } - D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg); - } - switch (reg) { - case NR_REG_ALL_NIC: - case NR_REG_PIPE_MASTER: - case NR_REG_PIPE_SLAVE: - priv->np_txqfirst = 0; - priv->np_txqlast = na->num_tx_rings; - priv->np_rxqfirst = 0; - priv->np_rxqlast = na->num_rx_rings; - ND("%s %d %d", "ALL/PIPE", - priv->np_rxqfirst, priv->np_rxqlast); - break; - case NR_REG_SW: - case NR_REG_NIC_SW: - if (!(na->na_flags & NAF_HOST_RINGS)) { - D("host rings not supported"); - return EINVAL; - } - priv->np_txqfirst = (reg == NR_REG_SW ? - na->num_tx_rings : 0); - priv->np_txqlast = na->num_tx_rings + 1; - priv->np_rxqfirst = (reg == NR_REG_SW ? - na->num_rx_rings : 0); - priv->np_rxqlast = na->num_rx_rings + 1; - ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW", - priv->np_rxqfirst, priv->np_rxqlast); - break; - case NR_REG_ONE_NIC: - if (i >= na->num_tx_rings && i >= na->num_rx_rings) { - D("invalid ring id %d", i); - return EINVAL; - } - /* if not enough rings, use the first one */ - j = i; - if (j >= na->num_tx_rings) - j = 0; - priv->np_txqfirst = j; - priv->np_txqlast = j + 1; - j = i; - if (j >= na->num_rx_rings) - j = 0; - priv->np_rxqfirst = j; - priv->np_rxqlast = j + 1; - break; - default: - D("invalid regif type %d", reg); - return EINVAL; - } - priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1; - priv->np_flags = (flags & ~NR_REG_MASK) | reg; - if (nm_tx_si_user(priv)) - na->tx_si_users++; - if (nm_rx_si_user(priv)) - na->rx_si_users++; - if (netmap_verbose) { - D("%s: tx [%d,%d) rx [%d,%d) id %d", - NM_IFPNAME(na->ifp), - priv->np_txqfirst, - priv->np_txqlast, - priv->np_rxqfirst, - priv->np_rxqlast, - i); - } - return 0; -} - -/* - * possibly move the interface to netmap-mode. - * If success it returns a pointer to netmap_if, otherwise NULL. - * This must be called with NMG_LOCK held. - */ -struct netmap_if * -netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags, int *err) -{ - struct ifnet *ifp = na->ifp; - struct netmap_if *nifp = NULL; - int error, need_mem = 0; - - NMG_LOCK_ASSERT(); - /* ring configuration may have changed, fetch from the card */ - netmap_update_config(na); - priv->np_na = na; /* store the reference */ - error = netmap_set_ringid(priv, ringid, flags); - if (error) - goto out; - /* ensure allocators are ready */ - need_mem = !netmap_have_memory_locked(priv); - if (need_mem) { - error = netmap_get_memory_locked(priv); - ND("get_memory returned %d", error); - if (error) - goto out; - } - nifp = netmap_if_new(NM_IFPNAME(ifp), na); - if (nifp == NULL) { /* allocation failed */ - /* we should drop the allocator, but only - * if we were the ones who grabbed it - */ - error = ENOMEM; - goto out; - } - na->active_fds++; - if (ifp->if_capenable & IFCAP_NETMAP) { - /* was already set */ - } else { - /* Otherwise set the card in netmap mode - * and make it use the shared buffers. - * - * do not core lock because the race is harmless here, - * there cannot be any traffic to netmap_transmit() - */ - na->na_lut = na->nm_mem->pools[NETMAP_BUF_POOL].lut; - ND("%p->na_lut == %p", na, na->na_lut); - na->na_lut_objtotal = na->nm_mem->pools[NETMAP_BUF_POOL].objtotal; - error = na->nm_register(na, 1); /* mode on */ - if (error) { - netmap_do_unregif(priv, nifp); - nifp = NULL; - } - } -out: - *err = error; - if (error) { - priv->np_na = NULL; - if (need_mem) - netmap_drop_memory_locked(priv); - } - if (nifp != NULL) { - /* - * advertise that the interface is ready bt setting ni_nifp. - * The barrier is needed because readers (poll and *SYNC) - * check for priv->np_nifp != NULL without locking - */ - wmb(); /* make sure previous writes are visible to all CPUs */ - priv->np_nifp = nifp; - } - return nifp; -} - - - -/* - * ioctl(2) support for the "netmap" device. - * - * Following a list of accepted commands: - * - NIOCGINFO - * - SIOCGIFADDR just for convenience - * - NIOCREGIF - * - NIOCTXSYNC - * - NIOCRXSYNC - * - * Return 0 on success, errno otherwise. - */ -int -netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, - int fflag, struct thread *td) -{ - struct netmap_priv_d *priv = NULL; - struct ifnet *ifp = NULL; - struct nmreq *nmr = (struct nmreq *) data; - struct netmap_adapter *na = NULL; - int error; - u_int i, qfirst, qlast; - struct netmap_if *nifp; - struct netmap_kring *krings; - - (void)dev; /* UNUSED */ - (void)fflag; /* UNUSED */ - - if (cmd == NIOCGINFO || cmd == NIOCREGIF) { - /* truncate name */ - nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; - if (nmr->nr_version != NETMAP_API) { - D("API mismatch for %s got %d need %d", - nmr->nr_name, - nmr->nr_version, NETMAP_API); - nmr->nr_version = NETMAP_API; - } - if (nmr->nr_version < NETMAP_MIN_API || - nmr->nr_version > NETMAP_MAX_API) { - return EINVAL; - } - } - CURVNET_SET(TD_TO_VNET(td)); - - error = devfs_get_cdevpriv((void **)&priv); - if (error) { - CURVNET_RESTORE(); - /* XXX ENOENT should be impossible, since the priv - * is now created in the open */ - return (error == ENOENT ? ENXIO : error); - } - - switch (cmd) { - case NIOCGINFO: /* return capabilities etc */ - if (nmr->nr_cmd == NETMAP_BDG_LIST) { - error = netmap_bdg_ctl(nmr, NULL); - break; - } - - NMG_LOCK(); - do { - /* memsize is always valid */ - struct netmap_mem_d *nmd = &nm_mem; - u_int memflags; - - if (nmr->nr_name[0] != '\0') { - /* get a refcount */ - error = netmap_get_na(nmr, &na, 1 /* create */); - if (error) - break; - nmd = na->nm_mem; /* get memory allocator */ - } - - error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags, - &nmr->nr_arg2); - if (error) - break; - if (na == NULL) /* only memory info */ - break; - nmr->nr_offset = 0; - nmr->nr_rx_slots = nmr->nr_tx_slots = 0; - netmap_update_config(na); - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - netmap_adapter_put(na); - } while (0); - NMG_UNLOCK(); - break; - - case NIOCREGIF: - /* possibly attach/detach NIC and VALE switch */ - i = nmr->nr_cmd; - if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH - || i == NETMAP_BDG_VNET_HDR) { - error = netmap_bdg_ctl(nmr, NULL); - break; - } else if (i != 0) { - D("nr_cmd must be 0 not %d", i); - error = EINVAL; - break; - } - - /* protect access to priv from concurrent NIOCREGIF */ - NMG_LOCK(); - do { - u_int memflags; - - if (priv->np_na != NULL) { /* thread already registered */ - error = EBUSY; - break; - } - /* find the interface and a reference */ - error = netmap_get_na(nmr, &na, 1 /* create */); /* keep reference */ - if (error) - break; - ifp = na->ifp; - if (NETMAP_OWNED_BY_KERN(na)) { - netmap_adapter_put(na); - error = EBUSY; - break; - } - nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error); - if (!nifp) { /* reg. failed, release priv and ref */ - netmap_adapter_put(na); - priv->np_nifp = NULL; - break; - } - priv->np_td = td; // XXX kqueue, debugging only - - /* return the offset of the netmap_if object */ - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags, - &nmr->nr_arg2); - if (error) { - netmap_adapter_put(na); - break; - } - if (memflags & NETMAP_MEM_PRIVATE) { - *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; - } - priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ? - &na->tx_si : &na->tx_rings[priv->np_txqfirst].si; - priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ? - &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si; - - if (nmr->nr_arg3) { - D("requested %d extra buffers", nmr->nr_arg3); - nmr->nr_arg3 = netmap_extra_alloc(na, - &nifp->ni_bufs_head, nmr->nr_arg3); - D("got %d extra buffers", nmr->nr_arg3); - } - nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); - } while (0); - NMG_UNLOCK(); - break; - - case NIOCTXSYNC: - case NIOCRXSYNC: - nifp = priv->np_nifp; - - if (nifp == NULL) { - error = ENXIO; - break; - } - rmb(); /* make sure following reads are not from cache */ - - na = priv->np_na; /* we have a reference */ - - if (na == NULL) { - D("Internal error: nifp != NULL && na == NULL"); - error = ENXIO; - break; - } - - ifp = na->ifp; - if (ifp == NULL) { - RD(1, "the ifp is gone"); - error = ENXIO; - break; - } - - if (cmd == NIOCTXSYNC) { - krings = na->tx_rings; - qfirst = priv->np_txqfirst; - qlast = priv->np_txqlast; - } else { - krings = na->rx_rings; - qfirst = priv->np_rxqfirst; - qlast = priv->np_rxqlast; - } - - for (i = qfirst; i < qlast; i++) { - struct netmap_kring *kring = krings + i; - if (nm_kr_tryget(kring)) { - error = EBUSY; - goto out; - } - if (cmd == NIOCTXSYNC) { - if (netmap_verbose & NM_VERB_TXSYNC) - D("pre txsync ring %d cur %d hwcur %d", - i, kring->ring->cur, - kring->nr_hwcur); - if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { - netmap_ring_reinit(kring); - } else { - kring->nm_sync(kring, NAF_FORCE_RECLAIM); - } - if (netmap_verbose & NM_VERB_TXSYNC) - D("post txsync ring %d cur %d hwcur %d", - i, kring->ring->cur, - kring->nr_hwcur); - } else { - kring->nm_sync(kring, NAF_FORCE_READ); - microtime(&na->rx_rings[i].ring->ts); - } - nm_kr_put(kring); - } - - break; - -#ifdef __FreeBSD__ - case FIONBIO: - case FIOASYNC: - ND("FIONBIO/FIOASYNC are no-ops"); - break; - - case BIOCIMMEDIATE: - case BIOCGHDRCMPLT: - case BIOCSHDRCMPLT: - case BIOCSSEESENT: - D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); - break; - - default: /* allow device-specific ioctls */ - { - struct socket so; - - bzero(&so, sizeof(so)); - NMG_LOCK(); - error = netmap_get_na(nmr, &na, 0 /* don't create */); /* keep reference */ - if (error) { - netmap_adapter_put(na); - NMG_UNLOCK(); - break; - } - ifp = na->ifp; - so.so_vnet = ifp->if_vnet; - // so->so_proto not null. - error = ifioctl(&so, cmd, data, td); - netmap_adapter_put(na); - NMG_UNLOCK(); - break; - } - -#else /* linux */ - default: - error = EOPNOTSUPP; -#endif /* linux */ - } -out: - - CURVNET_RESTORE(); - return (error); -} - - -/* - * select(2) and poll(2) handlers for the "netmap" device. - * - * Can be called for one or more queues. - * Return true the event mask corresponding to ready events. - * If there are no ready events, do a selrecord on either individual - * selinfo or on the global one. - * Device-dependent parts (locking and sync of tx/rx rings) - * are done through callbacks. - * - * On linux, arguments are really pwait, the poll table, and 'td' is struct file * - * The first one is remapped to pwait as selrecord() uses the name as an - * hidden argument. - */ -int -netmap_poll(struct cdev *dev, int events, struct thread *td) -{ - struct netmap_priv_d *priv = NULL; - struct netmap_adapter *na; - struct ifnet *ifp; - struct netmap_kring *kring; - u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0; - struct mbq q; /* packets from hw queues to host stack */ - void *pwait = dev; /* linux compatibility */ - int is_kevent = 0; - - /* - * In order to avoid nested locks, we need to "double check" - * txsync and rxsync if we decide to do a selrecord(). - * retry_tx (and retry_rx, later) prevent looping forever. - */ - int retry_tx = 1, retry_rx = 1; - - (void)pwait; - mbq_init(&q); - - /* - * XXX kevent has curthread->tp_fop == NULL, - * so devfs_get_cdevpriv() fails. We circumvent this by passing - * priv as the first argument, which is also useful to avoid - * the selrecord() which are not necessary in that case. - */ - if (devfs_get_cdevpriv((void **)&priv) != 0) { - is_kevent = 1; - if (netmap_verbose) - D("called from kevent"); - priv = (struct netmap_priv_d *)dev; - } - if (priv == NULL) - return POLLERR; - - if (priv->np_nifp == NULL) { - D("No if registered"); - return POLLERR; - } - rmb(); /* make sure following reads are not from cache */ - - na = priv->np_na; - ifp = na->ifp; - // check for deleted - if (ifp == NULL) { - RD(1, "the ifp is gone"); - return POLLERR; - } - - if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) - return POLLERR; - - if (netmap_verbose & 0x8000) - D("device %s events 0x%x", NM_IFPNAME(ifp), events); - want_tx = events & (POLLOUT | POLLWRNORM); - want_rx = events & (POLLIN | POLLRDNORM); - - - /* - * check_all_{tx|rx} are set if the card has more than one queue AND - * the file descriptor is bound to all of them. If so, we sleep on - * the "global" selinfo, otherwise we sleep on individual selinfo - * (FreeBSD only allows two selinfo's per file descriptor). - * The interrupt routine in the driver wake one or the other - * (or both) depending on which clients are active. - * - * rxsync() is only called if we run out of buffers on a POLLIN. - * txsync() is called if we run out of buffers on POLLOUT, or - * there are pending packets to send. The latter can be disabled - * passing NETMAP_NO_TX_POLL in the NIOCREG call. - */ - check_all_tx = nm_tx_si_user(priv); - check_all_rx = nm_rx_si_user(priv); - - /* - * We start with a lock free round which is cheap if we have - * slots available. If this fails, then lock and call the sync - * routines. - */ - for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) { - kring = &na->rx_rings[i]; - /* XXX compare ring->cur and kring->tail */ - if (!nm_ring_empty(kring->ring)) { - revents |= want_rx; - want_rx = 0; /* also breaks the loop */ - } - } - for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) { - kring = &na->tx_rings[i]; - /* XXX compare ring->cur and kring->tail */ - if (!nm_ring_empty(kring->ring)) { - revents |= want_tx; - want_tx = 0; /* also breaks the loop */ - } - } - - /* - * If we want to push packets out (priv->np_txpoll) or - * want_tx is still set, we must issue txsync calls - * (on all rings, to avoid that the tx rings stall). - * XXX should also check cur != hwcur on the tx rings. - * Fortunately, normal tx mode has np_txpoll set. - */ - if (priv->np_txpoll || want_tx) { - /* - * The first round checks if anyone is ready, if not - * do a selrecord and another round to handle races. - * want_tx goes to 0 if any space is found, and is - * used to skip rings with no pending transmissions. - */ -flush_tx: - for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { - int found = 0; - - kring = &na->tx_rings[i]; - if (!want_tx && kring->ring->cur == kring->nr_hwcur) - continue; - /* only one thread does txsync */ - if (nm_kr_tryget(kring)) { - if (netmap_verbose) - RD(2, "%p lost race on txring %d, ok", - priv, i); - continue; - } - if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { - netmap_ring_reinit(kring); - revents |= POLLERR; - } else { - if (kring->nm_sync(kring, 0)) - revents |= POLLERR; - } - - /* - * If we found new slots, notify potential - * listeners on the same ring. - * Since we just did a txsync, look at the copies - * of cur,tail in the kring. - */ - found = kring->rcur != kring->rtail; - nm_kr_put(kring); - if (found) { /* notify other listeners */ - revents |= want_tx; - want_tx = 0; - na->nm_notify(na, i, NR_TX, 0); - } - } - if (want_tx && retry_tx && !is_kevent) { - selrecord(td, check_all_tx ? - &na->tx_si : &na->tx_rings[priv->np_txqfirst].si); - retry_tx = 0; - goto flush_tx; - } - } - - /* - * If want_rx is still set scan receive rings. - * Do it on all rings because otherwise we starve. - */ - if (want_rx) { - int send_down = 0; /* transparent mode */ - /* two rounds here to for race avoidance */ -do_retry_rx: - for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { - int found = 0; - - kring = &na->rx_rings[i]; - - if (nm_kr_tryget(kring)) { - if (netmap_verbose) - RD(2, "%p lost race on rxring %d, ok", - priv, i); - continue; - } - - /* - * transparent mode support: collect packets - * from the rxring(s). - * XXX NR_FORWARD should only be read on - * physical or NIC ports - */ - if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { - ND(10, "forwarding some buffers up %d to %d", - kring->nr_hwcur, kring->ring->cur); - netmap_grab_packets(kring, &q, netmap_fwd); - } - - if (kring->nm_sync(kring, 0)) - revents |= POLLERR; - if (netmap_no_timestamp == 0 || - kring->ring->flags & NR_TIMESTAMP) { - microtime(&kring->ring->ts); - } - /* after an rxsync we can use kring->rcur, rtail */ - found = kring->rcur != kring->rtail; - nm_kr_put(kring); - if (found) { - revents |= want_rx; - retry_rx = 0; - na->nm_notify(na, i, NR_RX, 0); - } - } - - /* transparent mode XXX only during first pass ? */ - if (na->na_flags & NAF_HOST_RINGS) { - kring = &na->rx_rings[na->num_rx_rings]; - if (check_all_rx - && (netmap_fwd || kring->ring->flags & NR_FORWARD)) { - /* XXX fix to use kring fields */ - if (nm_ring_empty(kring->ring)) - send_down = netmap_rxsync_from_host(na, td, dev); - if (!nm_ring_empty(kring->ring)) - revents |= want_rx; - } - } - - if (retry_rx && !is_kevent) - selrecord(td, check_all_rx ? - &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si); - if (send_down > 0 || retry_rx) { - retry_rx = 0; - if (send_down) - goto flush_tx; /* and retry_rx */ - else - goto do_retry_rx; - } - } - - /* - * Transparent mode: marked bufs on rx rings between - * kring->nr_hwcur and ring->head - * are passed to the other endpoint. - * - * In this mode we also scan the sw rxring, which in - * turn passes packets up. - * - * XXX Transparent mode at the moment requires to bind all - * rings to a single file descriptor. - */ - - if (q.head) - netmap_send_up(na->ifp, &q); - - return (revents); -} - - -/*-------------------- driver support routines -------------------*/ - -static int netmap_hw_krings_create(struct netmap_adapter *); - -static int -netmap_notify(struct netmap_adapter *na, u_int n_ring, - enum txrx tx, int flags) -{ - struct netmap_kring *kring; - - if (tx == NR_TX) { - kring = na->tx_rings + n_ring; - OS_selwakeup(&kring->si, PI_NET); - if (na->tx_si_users > 0) - OS_selwakeup(&na->tx_si, PI_NET); - } else { - kring = na->rx_rings + n_ring; - OS_selwakeup(&kring->si, PI_NET); - if (na->rx_si_users > 0) - OS_selwakeup(&na->rx_si, PI_NET); - } - return 0; -} - - -// XXX check handling of failures -int -netmap_attach_common(struct netmap_adapter *na) -{ - struct ifnet *ifp = na->ifp; - - if (na->num_tx_rings == 0 || na->num_rx_rings == 0) { - D("%s: invalid rings tx %d rx %d", - ifp->if_xname, na->num_tx_rings, na->num_rx_rings); - return EINVAL; - } - WNA(ifp) = na; - - /* the following is only needed for na that use the host port. - * XXX do we have something similar for linux ? - */ -#ifdef __FreeBSD__ - na->if_input = ifp->if_input; /* for netmap_send_up */ -#endif /* __FreeBSD__ */ - - NETMAP_SET_CAPABLE(ifp); - if (na->nm_krings_create == NULL) { - na->nm_krings_create = netmap_hw_krings_create; - na->nm_krings_delete = netmap_hw_krings_delete; - } - if (na->nm_notify == NULL) - na->nm_notify = netmap_notify; - na->active_fds = 0; - - if (na->nm_mem == NULL) - na->nm_mem = &nm_mem; - return 0; -} - - -void -netmap_detach_common(struct netmap_adapter *na) -{ - if (na->ifp) - WNA(na->ifp) = NULL; /* XXX do we need this? */ - - if (na->tx_rings) { /* XXX should not happen */ - D("freeing leftover tx_rings"); - na->nm_krings_delete(na); - } - netmap_pipe_dealloc(na); - if (na->na_flags & NAF_MEM_OWNER) - netmap_mem_private_delete(na->nm_mem); - bzero(na, sizeof(*na)); - free(na, M_DEVBUF); -} - - -/* - * Initialize a ``netmap_adapter`` object created by driver on attach. - * We allocate a block of memory with room for a struct netmap_adapter - * plus two sets of N+2 struct netmap_kring (where N is the number - * of hardware rings): - * krings 0..N-1 are for the hardware queues. - * kring N is for the host stack queue - * kring N+1 is only used for the selinfo for all queues. // XXX still true ? - * Return 0 on success, ENOMEM otherwise. - */ -int -netmap_attach(struct netmap_adapter *arg) -{ - struct netmap_hw_adapter *hwna = NULL; - // XXX when is arg == NULL ? - struct ifnet *ifp = arg ? arg->ifp : NULL; - - if (arg == NULL || ifp == NULL) - goto fail; - hwna = malloc(sizeof(*hwna), M_DEVBUF, M_NOWAIT | M_ZERO); - if (hwna == NULL) - goto fail; - hwna->up = *arg; - hwna->up.na_flags |= NAF_HOST_RINGS; - if (netmap_attach_common(&hwna->up)) { - free(hwna, M_DEVBUF); - goto fail; - } - netmap_adapter_get(&hwna->up); - -#ifdef linux - if (ifp->netdev_ops) { - /* prepare a clone of the netdev ops */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) - hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops; -#else - hwna->nm_ndo = *ifp->netdev_ops; -#endif - } - hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; - if (ifp->ethtool_ops) { - hwna->nm_eto = *ifp->ethtool_ops; - } - hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam; -#ifdef ETHTOOL_SCHANNELS - hwna->nm_eto.set_channels = linux_netmap_set_channels; -#endif -#endif /* linux */ - - D("success for %s", NM_IFPNAME(ifp)); - return 0; - -fail: - D("fail, arg %p ifp %p na %p", arg, ifp, hwna); - netmap_detach(ifp); - return (hwna ? EINVAL : ENOMEM); -} - - -void -NM_DBG(netmap_adapter_get)(struct netmap_adapter *na) -{ - if (!na) { - return; - } - - refcount_acquire(&na->na_refcount); -} - - -/* returns 1 iff the netmap_adapter is destroyed */ -int -NM_DBG(netmap_adapter_put)(struct netmap_adapter *na) -{ - if (!na) - return 1; - - if (!refcount_release(&na->na_refcount)) - return 0; - - if (na->nm_dtor) - na->nm_dtor(na); - - netmap_detach_common(na); - - return 1; -} - -int -netmap_hw_krings_create(struct netmap_adapter *na) -{ - int ret = netmap_krings_create(na, 0); - if (ret == 0) { - /* initialize the mbq for the sw rx ring */ - mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue); - ND("initialized sw rx queue %d", na->num_rx_rings); - } - return ret; -} - - - -/* - * Free the allocated memory linked to the given ``netmap_adapter`` - * object. - */ -void -netmap_detach(struct ifnet *ifp) -{ - struct netmap_adapter *na = NA(ifp); - - if (!na) - return; - - NMG_LOCK(); - netmap_disable_all_rings(ifp); - if (!netmap_adapter_put(na)) { - /* someone is still using the adapter, - * tell them that the interface is gone - */ - na->ifp = NULL; - /* give them a chance to notice */ - netmap_enable_all_rings(ifp); - } - NMG_UNLOCK(); -} - - -/* - * Intercept packets from the network stack and pass them - * to netmap as incoming packets on the 'software' ring. - * - * We only store packets in a bounded mbq and then copy them - * in the relevant rxsync routine. - * - * We rely on the OS to make sure that the ifp and na do not go - * away (typically the caller checks for IFF_DRV_RUNNING or the like). - * In nm_register() or whenever there is a reinitialization, - * we make sure to make the mode change visible here. - */ -int -netmap_transmit(struct ifnet *ifp, struct mbuf *m) -{ - struct netmap_adapter *na = NA(ifp); - struct netmap_kring *kring; - u_int len = MBUF_LEN(m); - u_int error = ENOBUFS; - struct mbq *q; - int space; - - // XXX [Linux] we do not need this lock - // if we follow the down/configure/up protocol -gl - // mtx_lock(&na->core_lock); - - if ( (ifp->if_capenable & IFCAP_NETMAP) == 0) { - D("%s not in netmap mode anymore", NM_IFPNAME(ifp)); - error = ENXIO; - goto done; - } - - kring = &na->rx_rings[na->num_rx_rings]; - q = &kring->rx_queue; - - // XXX reconsider long packets if we handle fragments - if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */ - D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp), - len, NETMAP_BDG_BUF_SIZE(na->nm_mem)); - goto done; - } - - /* protect against rxsync_from_host(), netmap_sw_to_nic() - * and maybe other instances of netmap_transmit (the latter - * not possible on Linux). - * Also avoid overflowing the queue. - */ - mtx_lock(&q->lock); - - space = kring->nr_hwtail - kring->nr_hwcur; - if (space < 0) - space += kring->nkr_num_slots; - if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX - RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p", - NM_IFPNAME(ifp), kring->nr_hwcur, kring->nr_hwtail, mbq_len(q), - len, m); - } else { - mbq_enqueue(q, m); - ND(10, "%s %d bufs in queue len %d m %p", - NM_IFPNAME(ifp), mbq_len(q), len, m); - /* notify outside the lock */ - m = NULL; - error = 0; - } - mtx_unlock(&q->lock); - -done: - if (m) - m_freem(m); - /* unconditionally wake up listeners */ - na->nm_notify(na, na->num_rx_rings, NR_RX, 0); - - return (error); -} - - -/* - * netmap_reset() is called by the driver routines when reinitializing - * a ring. The driver is in charge of locking to protect the kring. - * If native netmap mode is not set just return NULL. - */ -struct netmap_slot * -netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, - u_int new_cur) -{ - struct netmap_kring *kring; - int new_hwofs, lim; - - if (na == NULL) { - D("NULL na, should not happen"); - return NULL; /* no netmap support here */ - } - if (!(na->ifp->if_capenable & IFCAP_NETMAP)) { - ND("interface not in netmap mode"); - return NULL; /* nothing to reinitialize */ - } - - /* XXX note- in the new scheme, we are not guaranteed to be - * under lock (e.g. when called on a device reset). - * In this case, we should set a flag and do not trust too - * much the values. In practice: TODO - * - set a RESET flag somewhere in the kring - * - do the processing in a conservative way - * - let the *sync() fixup at the end. - */ - if (tx == NR_TX) { - if (n >= na->num_tx_rings) - return NULL; - kring = na->tx_rings + n; - // XXX check whether we should use hwcur or rcur - new_hwofs = kring->nr_hwcur - new_cur; - } else { - if (n >= na->num_rx_rings) - return NULL; - kring = na->rx_rings + n; - new_hwofs = kring->nr_hwtail - new_cur; - } - lim = kring->nkr_num_slots - 1; - if (new_hwofs > lim) - new_hwofs -= lim + 1; - - /* Always set the new offset value and realign the ring. */ - if (netmap_verbose) - D("%s %s%d hwofs %d -> %d, hwtail %d -> %d", - NM_IFPNAME(na->ifp), - tx == NR_TX ? "TX" : "RX", n, - kring->nkr_hwofs, new_hwofs, - kring->nr_hwtail, - tx == NR_TX ? lim : kring->nr_hwtail); - kring->nkr_hwofs = new_hwofs; - if (tx == NR_TX) { - kring->nr_hwtail = kring->nr_hwcur + lim; - if (kring->nr_hwtail > lim) - kring->nr_hwtail -= lim + 1; - } - -#if 0 // def linux - /* XXX check that the mappings are correct */ - /* need ring_nr, adapter->pdev, direction */ - buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { - D("error mapping rx netmap buffer %d", i); - // XXX fix error handling - } - -#endif /* linux */ - /* - * Wakeup on the individual and global selwait - * We do the wakeup here, but the ring is not yet reconfigured. - * However, we are under lock so there are no races. - */ - na->nm_notify(na, n, tx, 0); - return kring->ring->slot; -} - - -/* - * Dispatch rx/tx interrupts to the netmap rings. - * - * "work_done" is non-null on the RX path, NULL for the TX path. - * We rely on the OS to make sure that there is only one active - * instance per queue, and that there is appropriate locking. - * - * The 'notify' routine depends on what the ring is attached to. - * - for a netmap file descriptor, do a selwakeup on the individual - * waitqueue, plus one on the global one if needed - * - for a switch, call the proper forwarding routine - * - XXX more ? - */ -void -netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done) -{ - struct netmap_adapter *na = NA(ifp); - struct netmap_kring *kring; - - q &= NETMAP_RING_MASK; - - if (netmap_verbose) { - RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q); - } - - if (work_done) { /* RX path */ - if (q >= na->num_rx_rings) - return; // not a physical queue - kring = na->rx_rings + q; - kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ? - na->nm_notify(na, q, NR_RX, 0); - *work_done = 1; /* do not fire napi again */ - } else { /* TX path */ - if (q >= na->num_tx_rings) - return; // not a physical queue - kring = na->tx_rings + q; - na->nm_notify(na, q, NR_TX, 0); - } -} - - -/* - * Default functions to handle rx/tx interrupts from a physical device. - * "work_done" is non-null on the RX path, NULL for the TX path. - * - * If the card is not in netmap mode, simply return 0, - * so that the caller proceeds with regular processing. - * Otherwise call netmap_common_irq() and return 1. - * - * If the card is connected to a netmap file descriptor, - * do a selwakeup on the individual queue, plus one on the global one - * if needed (multiqueue card _and_ there are multiqueue listeners), - * and return 1. - * - * Finally, if called on rx from an interface connected to a switch, - * calls the proper forwarding routine, and return 1. - */ -int -netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done) -{ - // XXX could we check NAF_NATIVE_ON ? - if (!(ifp->if_capenable & IFCAP_NETMAP)) - return 0; - - if (NA(ifp)->na_flags & NAF_SKIP_INTR) { - ND("use regular interrupt"); - return 0; - } - - netmap_common_irq(ifp, q, work_done); - return 1; -} - - -/* - * Module loader and unloader - * - * netmap_init() creates the /dev/netmap device and initializes - * all global variables. Returns 0 on success, errno on failure - * (but there is no chance) - * - * netmap_fini() destroys everything. - */ - -static struct cdev *netmap_dev; /* /dev/netmap character device. */ -extern struct cdevsw netmap_cdevsw; - - -void -netmap_fini(void) -{ - // XXX destroy_bridges() ? - if (netmap_dev) - destroy_dev(netmap_dev); - netmap_mem_fini(); - NMG_LOCK_DESTROY(); - printf("netmap: unloaded module.\n"); -} - - -int -netmap_init(void) -{ - int error; - - NMG_LOCK_INIT(); - - error = netmap_mem_init(); - if (error != 0) - goto fail; - /* XXX could use make_dev_credv() to get error number */ - netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, - "netmap"); - if (!netmap_dev) - goto fail; - - netmap_init_bridges(); - printf("netmap: loaded module\n"); - return (0); -fail: - netmap_fini(); - return (EINVAL); /* may be incorrect */ -} diff --git a/netmap/sys/dev/netmap/netmap_freebsd.c b/netmap/sys/dev/netmap/netmap_freebsd.c deleted file mode 100644 index f3fa8ba..0000000 --- a/netmap/sys/dev/netmap/netmap_freebsd.c +++ /dev/null @@ -1,657 +0,0 @@ -/* - * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* $FreeBSD$ */ - -#include -#include -#include -#include /* defines used in kernel.h */ -#include /* POLLIN, POLLOUT */ -#include /* types used in module initialization */ -#include /* DEV_MODULE */ -#include - -#include - -#include /* vtophys */ -#include /* vtophys */ -#include -#include -#include -#include -#include - - -#include -#include /* sockaddrs */ -#include -#include -#include -#include /* bus_dmamap_* */ -#include /* in6_cksum_pseudo() */ -#include /* in_pseudo(), in_cksum_hdr() */ - -#include -#include -#include - - -/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ - -rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) -{ - /* TODO XXX please use the FreeBSD implementation for this. */ - uint16_t *words = (uint16_t *)data; - int nw = len / 2; - int i; - - for (i = 0; i < nw; i++) - cur_sum += be16toh(words[i]); - - if (len & 1) - cur_sum += (data[len-1] << 8); - - return cur_sum; -} - -/* Fold a raw checksum: 'cur_sum' is in host byte order, while the - * return value is in network byte order. - */ -uint16_t nm_csum_fold(rawsum_t cur_sum) -{ - /* TODO XXX please use the FreeBSD implementation for this. */ - while (cur_sum >> 16) - cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16); - - return htobe16((~cur_sum) & 0xFFFF); -} - -uint16_t nm_csum_ipv4(struct nm_iphdr *iph) -{ -#if 0 - return in_cksum_hdr((void *)iph); -#else - return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); -#endif -} - -void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, - size_t datalen, uint16_t *check) -{ - uint16_t pseudolen = datalen + iph->protocol; - - /* Compute and insert the pseudo-header cheksum. */ - *check = in_pseudo(iph->saddr, iph->daddr, - htobe16(pseudolen)); - /* Compute the checksum on TCP/UDP header + payload - * (includes the pseudo-header). - */ - *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); -} - -void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, - size_t datalen, uint16_t *check) -{ -#ifdef INET6 - *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0); - *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); -#else - static int notsupported = 0; - if (!notsupported) { - notsupported = 1; - D("inet6 segmentation not supported"); - } -#endif -} - - -/* - * Intercept the rx routine in the standard device driver. - * Second argument is non-zero to intercept, 0 to restore - */ -int -netmap_catch_rx(struct netmap_adapter *na, int intercept) -{ - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; - struct ifnet *ifp = na->ifp; - - if (intercept) { - if (gna->save_if_input) { - D("cannot intercept again"); - return EINVAL; /* already set */ - } - gna->save_if_input = ifp->if_input; - ifp->if_input = generic_rx_handler; - } else { - if (!gna->save_if_input){ - D("cannot restore"); - return EINVAL; /* not saved */ - } - ifp->if_input = gna->save_if_input; - gna->save_if_input = NULL; - } - - return 0; -} - - -/* - * Intercept the packet steering routine in the tx path, - * so that we can decide which queue is used for an mbuf. - * Second argument is non-zero to intercept, 0 to restore. - * On freebsd we just intercept if_transmit. - */ -void -netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) -{ - struct netmap_adapter *na = &gna->up.up; - struct ifnet *ifp = na->ifp; - - if (enable) { - na->if_transmit = ifp->if_transmit; - ifp->if_transmit = netmap_transmit; - } else { - ifp->if_transmit = na->if_transmit; - } -} - - -/* - * Transmit routine used by generic_netmap_txsync(). Returns 0 on success - * and non-zero on error (which may be packet drops or other errors). - * addr and len identify the netmap buffer, m is the (preallocated) - * mbuf to use for transmissions. - * - * We should add a reference to the mbuf so the m_freem() at the end - * of the transmission does not consume resources. - * - * On FreeBSD, and on multiqueue cards, we can force the queue using - * if ((m->m_flags & M_FLOWID) != 0) - * i = m->m_pkthdr.flowid % adapter->num_queues; - * else - * i = curcpu % adapter->num_queues; - * - */ -int -generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, - void *addr, u_int len, u_int ring_nr) -{ - int ret; - - m->m_len = m->m_pkthdr.len = 0; - - // copy data to the mbuf - m_copyback(m, 0, len, addr); - // inc refcount. We are alone, so we can skip the atomic - atomic_fetchadd_int(m->m_ext.ref_cnt, 1); - m->m_flags |= M_FLOWID; - m->m_pkthdr.flowid = ring_nr; - m->m_pkthdr.rcvif = ifp; /* used for tx notification */ - ret = NA(ifp)->if_transmit(ifp, m); - return ret; -} - - -/* - * The following two functions are empty until we have a generic - * way to extract the info from the ifp - */ -int -generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) -{ - D("called"); - return 0; -} - - -void -generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) -{ - D("called"); - *txq = netmap_generic_rings; - *rxq = netmap_generic_rings; -} - - -void netmap_mitigation_init(struct nm_generic_mit *mit, int idx, - struct netmap_adapter *na) -{ - ND("called"); - mit->mit_pending = 0; - mit->mit_ring_idx = idx; - mit->mit_na = na; -} - - -void netmap_mitigation_start(struct nm_generic_mit *mit) -{ - ND("called"); -} - - -void netmap_mitigation_restart(struct nm_generic_mit *mit) -{ - ND("called"); -} - - -int netmap_mitigation_active(struct nm_generic_mit *mit) -{ - ND("called"); - return 0; -} - - -void netmap_mitigation_cleanup(struct nm_generic_mit *mit) -{ - ND("called"); -} - - -/* - * In order to track whether pages are still mapped, we hook into - * the standard cdev_pager and intercept the constructor and - * destructor. - */ - -struct netmap_vm_handle_t { - struct cdev *dev; - struct netmap_priv_d *priv; -}; - - -static int -netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, - vm_ooffset_t foff, struct ucred *cred, u_short *color) -{ - struct netmap_vm_handle_t *vmh = handle; - - if (netmap_verbose) - D("handle %p size %jd prot %d foff %jd", - handle, (intmax_t)size, prot, (intmax_t)foff); - dev_ref(vmh->dev); - return 0; -} - - -static void -netmap_dev_pager_dtor(void *handle) -{ - struct netmap_vm_handle_t *vmh = handle; - struct cdev *dev = vmh->dev; - struct netmap_priv_d *priv = vmh->priv; - - if (netmap_verbose) - D("handle %p", handle); - netmap_dtor(priv); - free(vmh, M_DEVBUF); - dev_rel(dev); -} - - -static int -netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, - int prot, vm_page_t *mres) -{ - struct netmap_vm_handle_t *vmh = object->handle; - struct netmap_priv_d *priv = vmh->priv; - vm_paddr_t paddr; - vm_page_t page; - vm_memattr_t memattr; - vm_pindex_t pidx; - - ND("object %p offset %jd prot %d mres %p", - object, (intmax_t)offset, prot, mres); - memattr = object->memattr; - pidx = OFF_TO_IDX(offset); - paddr = netmap_mem_ofstophys(priv->np_mref, offset); - if (paddr == 0) - return VM_PAGER_FAIL; - - if (((*mres)->flags & PG_FICTITIOUS) != 0) { - /* - * If the passed in result page is a fake page, update it with - * the new physical address. - */ - page = *mres; - vm_page_updatefake(page, paddr, memattr); - } else { - /* - * Replace the passed in reqpage page with our own fake page and - * free up the all of the original pages. - */ -#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ -#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK -#define VM_OBJECT_WLOCK VM_OBJECT_LOCK -#endif /* VM_OBJECT_WUNLOCK */ - - VM_OBJECT_WUNLOCK(object); - page = vm_page_getfake(paddr, memattr); - VM_OBJECT_WLOCK(object); - vm_page_lock(*mres); - vm_page_free(*mres); - vm_page_unlock(*mres); - *mres = page; - vm_page_insert(page, object, pidx); - } - page->valid = VM_PAGE_BITS_ALL; - return (VM_PAGER_OK); -} - - -static struct cdev_pager_ops netmap_cdev_pager_ops = { - .cdev_pg_ctor = netmap_dev_pager_ctor, - .cdev_pg_dtor = netmap_dev_pager_dtor, - .cdev_pg_fault = netmap_dev_pager_fault, -}; - - -static int -netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, - vm_size_t objsize, vm_object_t *objp, int prot) -{ - int error; - struct netmap_vm_handle_t *vmh; - struct netmap_priv_d *priv; - vm_object_t obj; - - if (netmap_verbose) - D("cdev %p foff %jd size %jd objp %p prot %d", cdev, - (intmax_t )*foff, (intmax_t )objsize, objp, prot); - - vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (vmh == NULL) - return ENOMEM; - vmh->dev = cdev; - - NMG_LOCK(); - error = devfs_get_cdevpriv((void**)&priv); - if (error) - goto err_unlock; - vmh->priv = priv; - priv->np_refcount++; - NMG_UNLOCK(); - - error = netmap_get_memory(priv); - if (error) - goto err_deref; - - obj = cdev_pager_allocate(vmh, OBJT_DEVICE, - &netmap_cdev_pager_ops, objsize, prot, - *foff, NULL); - if (obj == NULL) { - D("cdev_pager_allocate failed"); - error = EINVAL; - goto err_deref; - } - - *objp = obj; - return 0; - -err_deref: - NMG_LOCK(); - priv->np_refcount--; -err_unlock: - NMG_UNLOCK(); -// err: - free(vmh, M_DEVBUF); - return error; -} - - -// XXX can we remove this ? -static int -netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) -{ - if (netmap_verbose) - D("dev %p fflag 0x%x devtype %d td %p", - dev, fflag, devtype, td); - return 0; -} - - -static int -netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) -{ - struct netmap_priv_d *priv; - int error; - - (void)dev; - (void)oflags; - (void)devtype; - (void)td; - - // XXX wait or nowait ? - priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (priv == NULL) - return ENOMEM; - - error = devfs_set_cdevpriv(priv, netmap_dtor); - if (error) - return error; - - priv->np_refcount = 1; - - return 0; -} - -/******************** kqueue support ****************/ - -/* - * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED. - * We use a non-zero argument to distinguish the call from the one - * in kevent_scan() which instead also needs to run netmap_poll(). - * The knote uses a global mutex for the time being. We might - * try to reuse the one in the si, but it is not allocated - * permanently so it might be a bit tricky. - * - * The *kqfilter function registers one or another f_event - * depending on read or write mode. - * In the call to f_event() td_fpop is NULL so any child function - * calling devfs_get_cdevpriv() would fail - and we need it in - * netmap_poll(). As a workaround we store priv into kn->kn_hook - * and pass it as first argument to netmap_poll(), which then - * uses the failure to tell that we are called from f_event() - * and do not need the selrecord(). - */ - -void freebsd_selwakeup(struct selinfo *si, int pri); - -void -freebsd_selwakeup(struct selinfo *si, int pri) -{ - if (netmap_verbose) - D("on knote %p", &si->si_note); - selwakeuppri(si, pri); - /* use a non-zero hint to tell the notification from the - * call done in kqueue_scan() which uses 0 - */ - KNOTE_UNLOCKED(&si->si_note, 0x100 /* notification */); -} - -static void -netmap_knrdetach(struct knote *kn) -{ - struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; - struct selinfo *si = priv->np_rxsi; - - D("remove selinfo %p", si); - knlist_remove(&si->si_note, kn, 0); -} - -static void -netmap_knwdetach(struct knote *kn) -{ - struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; - struct selinfo *si = priv->np_txsi; - - D("remove selinfo %p", si); - knlist_remove(&si->si_note, kn, 0); -} - -/* - * callback from notifies (generated externally) and our - * calls to kevent(). The former we just return 1 (ready) - * since we do not know better. - * In the latter we call netmap_poll and return 0/1 accordingly. - */ -static int -netmap_knrw(struct knote *kn, long hint, int events) -{ - struct netmap_priv_d *priv; - int revents; - - if (hint != 0) { - ND(5, "call from notify"); - return 1; /* assume we are ready */ - } - priv = kn->kn_hook; - /* the notification may come from an external thread, - * in which case we do not want to run the netmap_poll - * This should be filtered above, but check just in case. - */ - if (curthread != priv->np_td) { /* should not happen */ - RD(5, "curthread changed %p %p", curthread, priv->np_td); - return 1; - } else { - revents = netmap_poll((void *)priv, events, curthread); - return (events & revents) ? 1 : 0; - } -} - -static int -netmap_knread(struct knote *kn, long hint) -{ - return netmap_knrw(kn, hint, POLLIN); -} - -static int -netmap_knwrite(struct knote *kn, long hint) -{ - return netmap_knrw(kn, hint, POLLOUT); -} - -static struct filterops netmap_rfiltops = { - .f_isfd = 1, - .f_detach = netmap_knrdetach, - .f_event = netmap_knread, -}; - -static struct filterops netmap_wfiltops = { - .f_isfd = 1, - .f_detach = netmap_knwdetach, - .f_event = netmap_knwrite, -}; - - -/* - * This is called when a thread invokes kevent() to record - * a change in the configuration of the kqueue(). - * The 'priv' should be the same as in the netmap device. - */ -static int -netmap_kqfilter(struct cdev *dev, struct knote *kn) -{ - struct netmap_priv_d *priv; - int error; - struct netmap_adapter *na; - struct selinfo *si; - int ev = kn->kn_filter; - - if (ev != EVFILT_READ && ev != EVFILT_WRITE) { - D("bad filter request %d", ev); - return 1; - } - error = devfs_get_cdevpriv((void**)&priv); - if (error) { - D("device not yet setup"); - return 1; - } - na = priv->np_na; - if (na == NULL) { - D("no netmap adapter for this file descriptor"); - return 1; - } - /* the si is indicated in the priv */ - si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi; - // XXX lock(priv) ? - kn->kn_fop = (ev == EVFILT_WRITE) ? - &netmap_wfiltops : &netmap_rfiltops; - kn->kn_hook = priv; - knlist_add(&si->si_note, kn, 1); - // XXX unlock(priv) - ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s", - na, na->ifp->if_xname, curthread, priv, kn, - priv->np_nifp, - kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH"); - return 0; -} - -struct cdevsw netmap_cdevsw = { - .d_version = D_VERSION, - .d_name = "netmap", - .d_open = netmap_open, - .d_mmap_single = netmap_mmap_single, - .d_ioctl = netmap_ioctl, - .d_poll = netmap_poll, - .d_kqfilter = netmap_kqfilter, - .d_close = netmap_close, -}; -/*--- end of kqueue support ----*/ - -/* - * Kernel entry point. - * - * Initialize/finalize the module and return. - * - * Return 0 on success, errno on failure. - */ -static int -netmap_loader(__unused struct module *module, int event, __unused void *arg) -{ - int error = 0; - - switch (event) { - case MOD_LOAD: - error = netmap_init(); - break; - - case MOD_UNLOAD: - netmap_fini(); - break; - - default: - error = EOPNOTSUPP; - break; - } - - return (error); -} - - -DEV_MODULE(netmap, netmap_loader, NULL); diff --git a/netmap/sys/dev/netmap/netmap_generic.c b/netmap/sys/dev/netmap/netmap_generic.c deleted file mode 100644 index 829f913..0000000 --- a/netmap/sys/dev/netmap/netmap_generic.c +++ /dev/null @@ -1,818 +0,0 @@ -/* - * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * This module implements netmap support on top of standard, - * unmodified device drivers. - * - * A NIOCREGIF request is handled here if the device does not - * have native support. TX and RX rings are emulated as follows: - * - * NIOCREGIF - * We preallocate a block of TX mbufs (roughly as many as - * tx descriptors; the number is not critical) to speed up - * operation during transmissions. The refcount on most of - * these buffers is artificially bumped up so we can recycle - * them more easily. Also, the destructor is intercepted - * so we use it as an interrupt notification to wake up - * processes blocked on a poll(). - * - * For each receive ring we allocate one "struct mbq" - * (an mbuf tailq plus a spinlock). We intercept packets - * (through if_input) - * on the receive path and put them in the mbq from which - * netmap receive routines can grab them. - * - * TX: - * in the generic_txsync() routine, netmap buffers are copied - * (or linked, in a future) to the preallocated mbufs - * and pushed to the transmit queue. Some of these mbufs - * (those with NS_REPORT, or otherwise every half ring) - * have the refcount=1, others have refcount=2. - * When the destructor is invoked, we take that as - * a notification that all mbufs up to that one in - * the specific ring have been completed, and generate - * the equivalent of a transmit interrupt. - * - * RX: - * - */ - -#ifdef __FreeBSD__ - -#include /* prerequisite */ -__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257666 2013-11-05 01:06:22Z luigi $"); - -#include -#include -#include -#include /* PROT_EXEC */ -#include -#include /* sockaddrs */ -#include -#include -#include -#include /* bus_dmamap_* in netmap_kern.h */ - -// XXX temporary - D() defined here -#include -#include -#include - -#define rtnl_lock() D("rtnl_lock called"); -#define rtnl_unlock() D("rtnl_unlock called"); -#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) -#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) -#define smp_mb() - -/* - * mbuf wrappers - */ - -/* - * we allocate an EXT_PACKET - */ -#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE) - -/* mbuf destructor, also need to change the type to EXT_EXTREF, - * add an M_NOFREE flag, and then clear the flag and - * chain into uma_zfree(zone_pack, mf) - * (or reinstall the buffer ?) - */ -#define SET_MBUF_DESTRUCTOR(m, fn) do { \ - (m)->m_ext.ext_free = (void *)fn; \ - (m)->m_ext.ext_type = EXT_EXTREF; \ -} while (0) - - -#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1) - - - -#else /* linux */ - -#include "bsd_glue.h" - -#include /* rtnl_[un]lock() */ -#include /* struct ethtool_ops, get_ringparam */ -#include - -//#define REG_RESET - -#endif /* linux */ - - -/* Common headers. */ -#include -#include -#include - - - -/* ======================== usage stats =========================== */ - -#ifdef RATE_GENERIC -#define IFRATE(x) x -struct rate_stats { - unsigned long txpkt; - unsigned long txsync; - unsigned long txirq; - unsigned long rxpkt; - unsigned long rxirq; - unsigned long rxsync; -}; - -struct rate_context { - unsigned refcount; - struct timer_list timer; - struct rate_stats new; - struct rate_stats old; -}; - -#define RATE_PRINTK(_NAME_) \ - printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); -#define RATE_PERIOD 2 -static void rate_callback(unsigned long arg) -{ - struct rate_context * ctx = (struct rate_context *)arg; - struct rate_stats cur = ctx->new; - int r; - - RATE_PRINTK(txpkt); - RATE_PRINTK(txsync); - RATE_PRINTK(txirq); - RATE_PRINTK(rxpkt); - RATE_PRINTK(rxsync); - RATE_PRINTK(rxirq); - printk("\n"); - - ctx->old = cur; - r = mod_timer(&ctx->timer, jiffies + - msecs_to_jiffies(RATE_PERIOD * 1000)); - if (unlikely(r)) - D("[v1000] Error: mod_timer()"); -} - -static struct rate_context rate_ctx; - -void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi) -{ - if (txp) rate_ctx.new.txpkt++; - if (txs) rate_ctx.new.txsync++; - if (txi) rate_ctx.new.txirq++; - if (rxp) rate_ctx.new.rxpkt++; - if (rxs) rate_ctx.new.rxsync++; - if (rxi) rate_ctx.new.rxirq++; -} - -#else /* !RATE */ -#define IFRATE(x) -#endif /* !RATE */ - - -/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */ -#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */ - -/* - * Wrapper used by the generic adapter layer to notify - * the poller threads. Differently from netmap_rx_irq(), we check - * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq. - */ -static void -netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) -{ - if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP))) - return; - - netmap_common_irq(ifp, q, work_done); -} - - -/* Enable/disable netmap mode for a generic network interface. */ -static int -generic_netmap_register(struct netmap_adapter *na, int enable) -{ - struct ifnet *ifp = na->ifp; - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; - struct mbuf *m; - int error; - int i, r; - - if (!na) - return EINVAL; - -#ifdef REG_RESET - error = ifp->netdev_ops->ndo_stop(ifp); - if (error) { - return error; - } -#endif /* REG_RESET */ - - if (enable) { /* Enable netmap mode. */ - /* Init the mitigation support on all the rx queues. */ - gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (!gna->mit) { - D("mitigation allocation failed"); - error = ENOMEM; - goto out; - } - for (r=0; rnum_rx_rings; r++) - netmap_mitigation_init(&gna->mit[r], r, na); - - /* Initialize the rx queue, as generic_rx_handler() can - * be called as soon as netmap_catch_rx() returns. - */ - for (r=0; rnum_rx_rings; r++) { - mbq_safe_init(&na->rx_rings[r].rx_queue); - } - - /* - * Preallocate packet buffers for the tx rings. - */ - for (r=0; rnum_tx_rings; r++) - na->tx_rings[r].tx_pool = NULL; - for (r=0; rnum_tx_rings; r++) { - na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (!na->tx_rings[r].tx_pool) { - D("tx_pool allocation failed"); - error = ENOMEM; - goto free_tx_pools; - } - for (i=0; inum_tx_desc; i++) - na->tx_rings[r].tx_pool[i] = NULL; - for (i=0; inum_tx_desc; i++) { - m = netmap_get_mbuf(GENERIC_BUF_SIZE); - if (!m) { - D("tx_pool[%d] allocation failed", i); - error = ENOMEM; - goto free_tx_pools; - } - na->tx_rings[r].tx_pool[i] = m; - } - } - rtnl_lock(); - /* Prepare to intercept incoming traffic. */ - error = netmap_catch_rx(na, 1); - if (error) { - D("netdev_rx_handler_register() failed (%d)", error); - goto register_handler; - } - ifp->if_capenable |= IFCAP_NETMAP; - - /* Make netmap control the packet steering. */ - netmap_catch_tx(gna, 1); - - rtnl_unlock(); - -#ifdef RATE_GENERIC - if (rate_ctx.refcount == 0) { - D("setup_timer()"); - memset(&rate_ctx, 0, sizeof(rate_ctx)); - setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); - if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { - D("Error: mod_timer()"); - } - } - rate_ctx.refcount++; -#endif /* RATE */ - - } else if (na->tx_rings[0].tx_pool) { - /* Disable netmap mode. We enter here only if the previous - generic_netmap_register(na, 1) was successfull. - If it was not, na->tx_rings[0].tx_pool was set to NULL by the - error handling code below. */ - rtnl_lock(); - - ifp->if_capenable &= ~IFCAP_NETMAP; - - /* Release packet steering control. */ - netmap_catch_tx(gna, 0); - - /* Do not intercept packets on the rx path. */ - netmap_catch_rx(na, 0); - - rtnl_unlock(); - - /* Free the mbufs going to the netmap rings */ - for (r=0; rnum_rx_rings; r++) { - mbq_safe_purge(&na->rx_rings[r].rx_queue); - mbq_safe_destroy(&na->rx_rings[r].rx_queue); - } - - for (r=0; rnum_rx_rings; r++) - netmap_mitigation_cleanup(&gna->mit[r]); - free(gna->mit, M_DEVBUF); - - for (r=0; rnum_tx_rings; r++) { - for (i=0; inum_tx_desc; i++) { - m_freem(na->tx_rings[r].tx_pool[i]); - } - free(na->tx_rings[r].tx_pool, M_DEVBUF); - } - -#ifdef RATE_GENERIC - if (--rate_ctx.refcount == 0) { - D("del_timer()"); - del_timer(&rate_ctx.timer); - } -#endif - } - -#ifdef REG_RESET - error = ifp->netdev_ops->ndo_open(ifp); - if (error) { - goto free_tx_pools; - } -#endif - - return 0; - -register_handler: - rtnl_unlock(); -free_tx_pools: - for (r=0; rnum_tx_rings; r++) { - if (na->tx_rings[r].tx_pool == NULL) - continue; - for (i=0; inum_tx_desc; i++) - if (na->tx_rings[r].tx_pool[i]) - m_freem(na->tx_rings[r].tx_pool[i]); - free(na->tx_rings[r].tx_pool, M_DEVBUF); - na->tx_rings[r].tx_pool = NULL; - } - for (r=0; rnum_rx_rings; r++) { - netmap_mitigation_cleanup(&gna->mit[r]); - mbq_safe_destroy(&na->rx_rings[r].rx_queue); - } - free(gna->mit, M_DEVBUF); -out: - - return error; -} - -/* - * Callback invoked when the device driver frees an mbuf used - * by netmap to transmit a packet. This usually happens when - * the NIC notifies the driver that transmission is completed. - */ -static void -generic_mbuf_destructor(struct mbuf *m) -{ - if (netmap_verbose) - D("Tx irq (%p) queue %d", m, MBUF_TXQ(m)); - netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); -#ifdef __FreeBSD__ - m->m_ext.ext_type = EXT_PACKET; - m->m_ext.ext_free = NULL; - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; - uma_zfree(zone_pack, m); -#endif /* __FreeBSD__ */ - IFRATE(rate_ctx.new.txirq++); -} - -/* Record completed transmissions and update hwtail. - * - * The oldest tx buffer not yet completed is at nr_hwtail + 1, - * nr_hwcur is the first unsent buffer. - */ -static u_int -generic_netmap_tx_clean(struct netmap_kring *kring) -{ - u_int const lim = kring->nkr_num_slots - 1; - u_int nm_i = nm_next(kring->nr_hwtail, lim); - u_int hwcur = kring->nr_hwcur; - u_int n = 0; - struct mbuf **tx_pool = kring->tx_pool; - - while (nm_i != hwcur) { /* buffers not completed */ - struct mbuf *m = tx_pool[nm_i]; - - if (unlikely(m == NULL)) { - /* this is done, try to replenish the entry */ - tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); - if (unlikely(m == NULL)) { - D("mbuf allocation failed, XXX error"); - // XXX how do we proceed ? break ? - return -ENOMEM; - } - } else if (GET_MBUF_REFCNT(m) != 1) { - break; /* This mbuf is still busy: its refcnt is 2. */ - } - n++; - nm_i = nm_next(nm_i, lim); - } - kring->nr_hwtail = nm_prev(nm_i, lim); - ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); - - return n; -} - - -/* - * We have pending packets in the driver between nr_hwtail +1 and hwcur. - * Compute a position in the middle, to be used to generate - * a notification. - */ -static inline u_int -generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur) -{ - u_int n = kring->nkr_num_slots; - u_int ntc = nm_next(kring->nr_hwtail, n-1); - u_int e; - - if (hwcur >= ntc) { - e = (hwcur + ntc) / 2; - } else { /* wrap around */ - e = (hwcur + n + ntc) / 2; - if (e >= n) { - e -= n; - } - } - - if (unlikely(e >= n)) { - D("This cannot happen"); - e = 0; - } - - return e; -} - -/* - * We have pending packets in the driver between nr_hwtail+1 and hwcur. - * Schedule a notification approximately in the middle of the two. - * There is a race but this is only called within txsync which does - * a double check. - */ -static void -generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) -{ - struct mbuf *m; - u_int e; - - if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) { - return; /* all buffers are free */ - } - e = generic_tx_event_middle(kring, hwcur); - - m = kring->tx_pool[e]; - if (m == NULL) { - /* This can happen if there is already an event on the netmap - slot 'e': There is nothing to do. */ - return; - } - ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m)); - kring->tx_pool[e] = NULL; - SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); - - // XXX wmb() ? - /* Decrement the refcount an free it if we have the last one. */ - m_freem(m); - smp_mb(); -} - - -/* - * generic_netmap_txsync() transforms netmap buffers into mbufs - * and passes them to the standard device driver - * (ndo_start_xmit() or ifp->if_transmit() ). - * On linux this is not done directly, but using dev_queue_xmit(), - * since it implements the TX flow control (and takes some locks). - */ -static int -generic_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ // j - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - u_int ring_nr = kring->ring_id; - - IFRATE(rate_ctx.new.txsync++); - - // TODO: handle the case of mbuf allocation failure - - rmb(); - - /* - * First part: process new packets to send. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - while (nm_i != head) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - void *addr = NMB(slot); - - /* device-specific */ - struct mbuf *m; - int tx_ret; - - NM_CHECK_ADDR_LEN(addr, len); - - /* Tale a mbuf from the tx pool and copy in the user packet. */ - m = kring->tx_pool[nm_i]; - if (unlikely(!m)) { - RD(5, "This should never happen"); - kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); - if (unlikely(m == NULL)) { - D("mbuf allocation failed"); - break; - } - } - /* XXX we should ask notifications when NS_REPORT is set, - * or roughly every half frame. We can optimize this - * by lazily requesting notifications only when a - * transmission fails. Probably the best way is to - * break on failures and set notifications when - * ring->cur == ring->tail || nm_i != cur - */ - tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); - if (unlikely(tx_ret)) { - RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]", - tx_ret, nm_i, head, kring->nr_hwtail); - /* - * No room for this mbuf in the device driver. - * Request a notification FOR A PREVIOUS MBUF, - * then call generic_netmap_tx_clean(kring) to do the - * double check and see if we can free more buffers. - * If there is space continue, else break; - * NOTE: the double check is necessary if the problem - * occurs in the txsync call after selrecord(). - * Also, we need some way to tell the caller that not - * all buffers were queued onto the device (this was - * not a problem with native netmap driver where space - * is preallocated). The bridge has a similar problem - * and we solve it there by dropping the excess packets. - */ - generic_set_tx_event(kring, nm_i); - if (generic_netmap_tx_clean(kring)) { /* space now available */ - continue; - } else { - break; - } - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - nm_i = nm_next(nm_i, lim); - IFRATE(rate_ctx.new.txpkt ++); - } - - /* Update hwcur to the next slot to transmit. */ - kring->nr_hwcur = nm_i; /* not head, we could break early */ - } - - /* - * Second, reclaim completed buffers - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - /* No more available slots? Set a notification event - * on a netmap slot that will be cleaned in the future. - * No doublecheck is performed, since txsync() will be - * called twice by netmap_poll(). - */ - generic_set_tx_event(kring, nm_i); - } - ND("tx #%d, hwtail = %d", n, kring->nr_hwtail); - - generic_netmap_tx_clean(kring); - - nm_txsync_finalize(kring); - - return 0; -} - - -/* - * This handler is registered (through netmap_catch_rx()) - * within the attached network interface - * in the RX subsystem, so that every mbuf passed up by - * the driver can be stolen to the network stack. - * Stolen packets are put in a queue where the - * generic_netmap_rxsync() callback can extract them. - */ -void -generic_rx_handler(struct ifnet *ifp, struct mbuf *m) -{ - struct netmap_adapter *na = NA(ifp); - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; - u_int work_done; - u_int rr = MBUF_RXQ(m); // receive ring number - - if (rr >= na->num_rx_rings) { - rr = rr % na->num_rx_rings; // XXX expensive... - } - - /* limit the size of the queue */ - if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { - m_freem(m); - } else { - mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); - } - - if (netmap_generic_mit < 32768) { - /* no rx mitigation, pass notification up */ - netmap_generic_irq(na->ifp, rr, &work_done); - IFRATE(rate_ctx.new.rxirq++); - } else { - /* same as send combining, filter notification if there is a - * pending timer, otherwise pass it up and start a timer. - */ - if (likely(netmap_mitigation_active(&gna->mit[rr]))) { - /* Record that there is some pending work. */ - gna->mit[rr].mit_pending = 1; - } else { - netmap_generic_irq(na->ifp, rr, &work_done); - IFRATE(rate_ctx.new.rxirq++); - netmap_mitigation_start(&gna->mit[rr]); - } - } -} - -/* - * generic_netmap_rxsync() extracts mbufs from the queue filled by - * generic_netmap_rx_handler() and puts their content in the netmap - * receive ring. - * Access must be protected because the rx handler is asynchronous, - */ -static int -generic_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ //j, - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - if (head > lim) - return netmap_ring_reinit(kring); - - /* - * First part: import newly received packets. - */ - if (netmap_no_pendintr || force_update) { - /* extract buffers from the rx queue, stop at most one - * slot before nr_hwcur (stop_i) - */ - uint16_t slot_flags = kring->nkr_slot_flags; - u_int stop_i = nm_prev(kring->nr_hwcur, lim); - - nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */ - for (n = 0; nm_i != stop_i; n++) { - int len; - void *addr = NMB(&ring->slot[nm_i]); - struct mbuf *m; - - /* we only check the address here on generic rx rings */ - if (addr == netmap_buffer_base) { /* Bad buffer */ - return netmap_ring_reinit(kring); - } - /* - * Call the locked version of the function. - * XXX Ideally we could grab a batch of mbufs at once - * and save some locking overhead. - */ - m = mbq_safe_dequeue(&kring->rx_queue); - if (!m) /* no more data */ - break; - len = MBUF_LEN(m); - m_copydata(m, 0, len, addr); - ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; - m_freem(m); - nm_i = nm_next(nm_i, lim); - } - if (n) { - kring->nr_hwtail = nm_i; - IFRATE(rate_ctx.new.rxpkt += n); - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - // XXX should we invert the order ? - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - /* Userspace has released some packets. */ - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - - slot->flags &= ~NS_BUF_CHANGED; - nm_i = nm_next(nm_i, lim); - } - kring->nr_hwcur = head; - } - /* tell userspace that there might be new packets. */ - nm_rxsync_finalize(kring); - IFRATE(rate_ctx.new.rxsync++); - - return 0; -} - -static void -generic_netmap_dtor(struct netmap_adapter *na) -{ - struct ifnet *ifp = na->ifp; - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; - struct netmap_adapter *prev_na = gna->prev; - - if (prev_na != NULL) { - D("Released generic NA %p", gna); - if_rele(na->ifp); - netmap_adapter_put(prev_na); - } - if (ifp != NULL) { - WNA(ifp) = prev_na; - D("Restored native NA %p", prev_na); - na->ifp = NULL; - } -} - -/* - * generic_netmap_attach() makes it possible to use netmap on - * a device without native netmap support. - * This is less performant than native support but potentially - * faster than raw sockets or similar schemes. - * - * In this "emulated" mode, netmap rings do not necessarily - * have the same size as those in the NIC. We use a default - * value and possibly override it if the OS has ways to fetch the - * actual configuration. - */ -int -generic_netmap_attach(struct ifnet *ifp) -{ - struct netmap_adapter *na; - struct netmap_generic_adapter *gna; - int retval; - u_int num_tx_desc, num_rx_desc; - - num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ - - generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); - ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); - if (num_tx_desc == 0 || num_rx_desc == 0) { - D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); - return EINVAL; - } - - gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); - if (gna == NULL) { - D("no memory on attach, give up"); - return ENOMEM; - } - na = (struct netmap_adapter *)gna; - na->ifp = ifp; - na->num_tx_desc = num_tx_desc; - na->num_rx_desc = num_rx_desc; - na->nm_register = &generic_netmap_register; - na->nm_txsync = &generic_netmap_txsync; - na->nm_rxsync = &generic_netmap_rxsync; - na->nm_dtor = &generic_netmap_dtor; - /* when using generic, IFCAP_NETMAP is set so we force - * NAF_SKIP_INTR to use the regular interrupt handler - */ - na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; - - ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", - ifp->num_tx_queues, ifp->real_num_tx_queues, - ifp->tx_queue_len); - ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", - ifp->num_rx_queues, ifp->real_num_rx_queues); - - generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); - - retval = netmap_attach_common(na); - if (retval) { - free(gna, M_DEVBUF); - } - - return retval; -} diff --git a/netmap/sys/dev/netmap/netmap_kern.h b/netmap/sys/dev/netmap/netmap_kern.h deleted file mode 100644 index fd84e70..0000000 --- a/netmap/sys/dev/netmap/netmap_kern.h +++ /dev/null @@ -1,1396 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/netmap_kern.h 238985 2012-08-02 11:59:43Z luigi $ - * - * The header contains the definitions of constants and function - * prototypes used only in kernelspace. - */ - -#ifndef _NET_NETMAP_KERN_H_ -#define _NET_NETMAP_KERN_H_ - -#define WITH_VALE // comment out to disable VALE support -#define WITH_PIPES - -#if defined(__FreeBSD__) - -#define likely(x) __builtin_expect((long)!!(x), 1L) -#define unlikely(x) __builtin_expect((long)!!(x), 0L) - -#define NM_LOCK_T struct mtx -#define NMG_LOCK_T struct mtx -#define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, \ - "netmap global lock", NULL, MTX_DEF) -#define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock) -#define NMG_LOCK() mtx_lock(&netmap_global_lock) -#define NMG_UNLOCK() mtx_unlock(&netmap_global_lock) -#define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED) - -#define NM_SELINFO_T struct selinfo -#define MBUF_LEN(m) ((m)->m_pkthdr.len) -#define MBUF_IFP(m) ((m)->m_pkthdr.rcvif) -#define NM_SEND_UP(ifp, m) ((NA(ifp))->if_input)(ifp, m) - -#define NM_ATOMIC_T volatile int // XXX ? -/* atomic operations */ -#include -#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) -#define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0) - - -MALLOC_DECLARE(M_NETMAP); - -// XXX linux struct, not used in FreeBSD -struct net_device_ops { -}; -struct ethtool_ops { -}; -struct hrtimer { -}; - -#elif defined (linux) - -#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h -#define NM_SELINFO_T wait_queue_head_t -#define MBUF_LEN(m) ((m)->len) -#define MBUF_IFP(m) ((m)->dev) -#define NM_SEND_UP(ifp, m) \ - do { \ - m->priority = NM_MAGIC_PRIORITY_RX; \ - netif_rx(m); \ - } while (0) - -#define NM_ATOMIC_T volatile long unsigned int - -// XXX a mtx would suffice here too 20130404 gl -#define NMG_LOCK_T struct semaphore -#define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1) -#define NMG_LOCK_DESTROY() -#define NMG_LOCK() down(&netmap_global_lock) -#define NMG_UNLOCK() up(&netmap_global_lock) -#define NMG_LOCK_ASSERT() // XXX to be completed - -#ifndef DEV_NETMAP -#define DEV_NETMAP -#endif /* DEV_NETMAP */ - -/* - * IFCAP_NETMAP goes into net_device's priv_flags (if_capenable). - * This was 16 bits up to linux 2.6.36, so we need a 16 bit value on older - * platforms and tolerate the clash with IFF_DYNAMIC and IFF_BRIDGE_PORT. - * For the 32-bit value, 0x100000 has no clashes until at least 3.5.1 - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) -#define IFCAP_NETMAP 0x8000 -#else -#define IFCAP_NETMAP 0x200000 -#endif - -#elif defined (__APPLE__) - -#warning apple support is incomplete. -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) -#define NM_LOCK_T IOLock * -#define NM_SELINFO_T struct selinfo -#define MBUF_LEN(m) ((m)->m_pkthdr.len) -#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) - -#else - -#error unsupported platform - -#endif /* end - platform-specific code */ - -#define ND(format, ...) -#define D(format, ...) \ - do { \ - struct timeval __xxts; \ - microtime(&__xxts); \ - printf("%03d.%06d [%4d] %-25s " format "\n", \ - (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ - __LINE__, __FUNCTION__, ##__VA_ARGS__); \ - } while (0) - -/* rate limited, lps indicates how many per second */ -#define RD(lps, format, ...) \ - do { \ - static int t0, __cnt; \ - if (t0 != time_second) { \ - t0 = time_second; \ - __cnt = 0; \ - } \ - if (__cnt++ < lps) \ - D(format, ##__VA_ARGS__); \ - } while (0) - -struct netmap_adapter; -struct nm_bdg_fwd; -struct nm_bridge; -struct netmap_priv_d; - -const char *nm_dump_buf(char *p, int len, int lim, char *dst); - -#include "netmap_mbq.h" - -extern NMG_LOCK_T netmap_global_lock; - -/* - * private, kernel view of a ring. Keeps track of the status of - * a ring across system calls. - * - * nr_hwcur index of the next buffer to refill. - * It corresponds to ring->head - * at the time the system call returns. - * - * nr_hwtail index of the first buffer owned by the kernel. - * On RX, hwcur->hwtail are receive buffers - * not yet released. hwcur is advanced following - * ring->head, hwtail is advanced on incoming packets, - * and a wakeup is generated when hwtail passes ring->cur - * On TX, hwcur->rcur have been filled by the sender - * but not sent yet to the NIC; rcur->hwtail are available - * for new transmissions, and hwtail->hwcur-1 are pending - * transmissions not yet acknowledged. - * - * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots. - * This is so that, on a reset, buffers owned by userspace are not - * modified by the kernel. In particular: - * RX rings: the next empty buffer (hwtail + hwofs) coincides with - * the next empty buffer as known by the hardware (next_to_check or so). - * TX rings: hwcur + hwofs coincides with next_to_send - * - * Clients cannot issue concurrent syscall on a ring. The system - * detects this and reports an error using two flags, - * NKR_WBUSY and NKR_RBUSY - * For received packets, slot->flags is set to nkr_slot_flags - * so we can provide a proper initial value (e.g. set NS_FORWARD - * when operating in 'transparent' mode). - * - * The following fields are used to implement lock-free copy of packets - * from input to output ports in VALE switch: - * nkr_hwlease buffer after the last one being copied. - * A writer in nm_bdg_flush reserves N buffers - * from nr_hwlease, advances it, then does the - * copy outside the lock. - * In RX rings (used for VALE ports), - * nkr_hwtail <= nkr_hwlease < nkr_hwcur+N-1 - * In TX rings (used for NIC or host stack ports) - * nkr_hwcur <= nkr_hwlease < nkr_hwtail - * nkr_leases array of nkr_num_slots where writers can report - * completion of their block. NR_NOSLOT (~0) indicates - * that the writer has not finished yet - * nkr_lease_idx index of next free slot in nr_leases, to be assigned - * - * The kring is manipulated by txsync/rxsync and generic netmap function. - * - * Concurrent rxsync or txsync on the same ring are prevented through - * by nm_kr_lock() which in turn uses nr_busy. This is all we need - * for NIC rings, and for TX rings attached to the host stack. - * - * RX rings attached to the host stack use an mbq (rx_queue) on both - * rxsync_from_host() and netmap_transmit(). The mbq is protected - * by its internal lock. - * - * RX rings attached to the VALE switch are accessed by both sender - * and receiver. They are protected through the q_lock on the RX ring. - */ -struct netmap_kring { - struct netmap_ring *ring; - - uint32_t nr_hwcur; - uint32_t nr_hwtail; - - /* - * Copies of values in user rings, so we do not need to look - * at the ring (which could be modified). These are set in the - * *sync_prologue()/finalize() routines. - */ - uint32_t rhead; - uint32_t rcur; - uint32_t rtail; - - uint32_t nr_kflags; /* private driver flags */ -#define NKR_PENDINTR 0x1 // Pending interrupt. - uint32_t nkr_num_slots; - - /* - * On a NIC reset, the NIC ring indexes may be reset but the - * indexes in the netmap rings remain the same. nkr_hwofs - * keeps track of the offset between the two. - */ - int32_t nkr_hwofs; - - uint16_t nkr_slot_flags; /* initial value for flags */ - - /* last_reclaim is opaque marker to help reduce the frequency - * of operations such as reclaiming tx buffers. A possible use - * is set it to ticks and do the reclaim only once per tick. - */ - uint64_t last_reclaim; - - - NM_SELINFO_T si; /* poll/select wait queue */ - NM_LOCK_T q_lock; /* protects kring and ring. */ - NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ - - struct netmap_adapter *na; - - /* The folloiwing fields are for VALE switch support */ - struct nm_bdg_fwd *nkr_ft; - uint32_t *nkr_leases; -#define NR_NOSLOT ((uint32_t)~0) /* used in nkr_*lease* */ - uint32_t nkr_hwlease; - uint32_t nkr_lease_idx; - - volatile int nkr_stopped; // XXX what for ? - - /* Support for adapters without native netmap support. - * On tx rings we preallocate an array of tx buffers - * (same size as the netmap ring), on rx rings we - * store incoming mbufs in a queue that is drained by - * a rxsync. - */ - struct mbuf **tx_pool; - // u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */ - struct mbq rx_queue; /* intercepted rx mbufs. */ - - uint32_t ring_id; /* debugging */ - char name[64]; /* diagnostic */ - - int (*nm_sync)(struct netmap_kring *kring, int flags); - -#ifdef WITH_PIPES - struct netmap_kring *pipe; - struct netmap_ring *save_ring; -#endif /* WITH_PIPES */ - -} __attribute__((__aligned__(64))); - - -/* return the next index, with wraparound */ -static inline uint32_t -nm_next(uint32_t i, uint32_t lim) -{ - return unlikely (i == lim) ? 0 : i + 1; -} - - -/* return the previous index, with wraparound */ -static inline uint32_t -nm_prev(uint32_t i, uint32_t lim) -{ - return unlikely (i == 0) ? lim : i - 1; -} - - -/* - * - * Here is the layout for the Rx and Tx rings. - - RxRING TxRING - - +-----------------+ +-----------------+ - | | | | - |XXX free slot XXX| |XXX free slot XXX| - +-----------------+ +-----------------+ -head->| owned by user |<-hwcur | not sent to nic |<-hwcur - | | | yet | - +-----------------+ | | - cur->| available to | | | - | user, not read | +-----------------+ - | yet | cur->| (being | - | | | prepared) | - | | | | - +-----------------+ + ------ + -tail->| |<-hwtail | |<-hwlease - | (being | ... | | ... - | prepared) | ... | | ... - +-----------------+ ... | | ... - | |<-hwlease +-----------------+ - | | tail->| |<-hwtail - | | | | - | | | | - | | | | - +-----------------+ +-----------------+ - - * The cur/tail (user view) and hwcur/hwtail (kernel view) - * are used in the normal operation of the card. - * - * When a ring is the output of a switch port (Rx ring for - * a VALE port, Tx ring for the host stack or NIC), slots - * are reserved in blocks through 'hwlease' which points - * to the next unused slot. - * On an Rx ring, hwlease is always after hwtail, - * and completions cause hwtail to advance. - * On a Tx ring, hwlease is always between cur and hwtail, - * and completions cause cur to advance. - * - * nm_kr_space() returns the maximum number of slots that - * can be assigned. - * nm_kr_lease() reserves the required number of buffers, - * advances nkr_hwlease and also returns an entry in - * a circular array where completions should be reported. - */ - - - -enum txrx { NR_RX = 0, NR_TX = 1 }; - -/* - * The "struct netmap_adapter" extends the "struct adapter" - * (or equivalent) device descriptor. - * It contains all base fields needed to support netmap operation. - * There are in fact different types of netmap adapters - * (native, generic, VALE switch...) so a netmap_adapter is - * just the first field in the derived type. - */ -struct netmap_adapter { - /* - * On linux we do not have a good way to tell if an interface - * is netmap-capable. So we always use the following trick: - * NA(ifp) points here, and the first entry (which hopefully - * always exists and is at least 32 bits) contains a magic - * value which we can use to detect that the interface is good. - */ - uint32_t magic; - uint32_t na_flags; /* enabled, and other flags */ -#define NAF_SKIP_INTR 1 /* use the regular interrupt handler. - * useful during initialization - */ -#define NAF_SW_ONLY 2 /* forward packets only to sw adapter */ -#define NAF_BDG_MAYSLEEP 4 /* the bridge is allowed to sleep when - * forwarding packets coming from this - * interface - */ -#define NAF_MEM_OWNER 8 /* the adapter is responsible for the - * deallocation of the memory allocator - */ -#define NAF_NATIVE_ON 16 /* the adapter is native and the attached - * interface is in netmap mode - */ -#define NAF_NETMAP_ON 32 /* netmap is active (either native or - * emulated. Where possible (e.g. FreeBSD) - * IFCAP_NETMAP also mirrors this flag. - */ -#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */ - int active_fds; /* number of user-space descriptors using this - interface, which is equal to the number of - struct netmap_if objs in the mapped region. */ - - u_int num_rx_rings; /* number of adapter receive rings */ - u_int num_tx_rings; /* number of adapter transmit rings */ - - u_int num_tx_desc; /* number of descriptor in each queue */ - u_int num_rx_desc; - - /* tx_rings and rx_rings are private but allocated - * as a contiguous chunk of memory. Each array has - * N+1 entries, for the adapter queues and for the host queue. - */ - struct netmap_kring *tx_rings; /* array of TX rings. */ - struct netmap_kring *rx_rings; /* array of RX rings. */ - - void *tailroom; /* space below the rings array */ - /* (used for leases) */ - - - NM_SELINFO_T tx_si, rx_si; /* global wait queues */ - - /* count users of the global wait queues */ - int tx_si_users, rx_si_users; - - /* copy of if_qflush and if_transmit pointers, to intercept - * packets from the network stack when netmap is active. - */ - int (*if_transmit)(struct ifnet *, struct mbuf *); - - /* copy of if_input for netmap_send_up() */ - void (*if_input)(struct ifnet *, struct mbuf *); - - /* references to the ifnet and device routines, used by - * the generic netmap functions. - */ - struct ifnet *ifp; /* adapter is ifp->if_softc */ - - /*---- callbacks for this netmap adapter -----*/ - /* - * nm_dtor() is the cleanup routine called when destroying - * the adapter. - * - * nm_register() is called on NIOCREGIF and close() to enter - * or exit netmap mode on the NIC - * - * nm_txsync() pushes packets to the underlying hw/switch - * - * nm_rxsync() collects packets from the underlying hw/switch - * - * nm_config() returns configuration information from the OS - * - * nm_krings_create() create and init the krings array - * (the array layout must conform to the description - * found above the definition of netmap_krings_create) - * - * nm_krings_delete() cleanup and delete the kring array - * - * nm_notify() is used to act after data have become available. - * For hw devices this is typically a selwakeup(), - * but for NIC/host ports attached to a switch (or vice-versa) - * we also need to invoke the 'txsync' code downstream. - */ - - /* private cleanup */ - void (*nm_dtor)(struct netmap_adapter *); - - int (*nm_register)(struct netmap_adapter *, int onoff); - - int (*nm_txsync)(struct netmap_kring *kring, int flags); - int (*nm_rxsync)(struct netmap_kring *kring, int flags); -#define NAF_FORCE_READ 1 -#define NAF_FORCE_RECLAIM 2 - /* return configuration information */ - int (*nm_config)(struct netmap_adapter *, - u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); - int (*nm_krings_create)(struct netmap_adapter *); - void (*nm_krings_delete)(struct netmap_adapter *); - int (*nm_notify)(struct netmap_adapter *, - u_int ring, enum txrx, int flags); -#define NAF_DISABLE_NOTIFY 8 - - /* standard refcount to control the lifetime of the adapter - * (it should be equal to the lifetime of the corresponding ifp) - */ - int na_refcount; - - /* memory allocator (opaque) - * We also cache a pointer to the lut_entry for translating - * buffer addresses, and the total number of buffers. - */ - struct netmap_mem_d *nm_mem; - struct lut_entry *na_lut; - uint32_t na_lut_objtotal; /* max buffer index */ - - /* used internally. If non-null, the interface cannot be bound - * from userspace - */ - void *na_private; - -#ifdef WITH_PIPES - struct netmap_pipe_adapter **na_pipes; - int na_next_pipe; - int na_max_pipes; -#endif /* WITH_PIPES */ -}; - - -/* - * If the NIC is owned by the kernel - * (i.e., bridge), neither another bridge nor user can use it; - * if the NIC is owned by a user, only users can share it. - * Evaluation must be done under NMG_LOCK(). - */ -#define NETMAP_OWNED_BY_KERN(na) (na->na_private) -#define NETMAP_OWNED_BY_ANY(na) \ - (NETMAP_OWNED_BY_KERN(na) || (na->active_fds > 0)) - - -/* - * derived netmap adapters for various types of ports - */ -struct netmap_vp_adapter { /* VALE software port */ - struct netmap_adapter up; - - /* - * Bridge support: - * - * bdg_port is the port number used in the bridge; - * na_bdg points to the bridge this NA is attached to. - */ - int bdg_port; - struct nm_bridge *na_bdg; - int retry; - - /* Offset of ethernet header for each packet. */ - u_int virt_hdr_len; - /* Maximum Frame Size, used in bdg_mismatch_datapath() */ - u_int mfs; -}; - - -struct netmap_hw_adapter { /* physical device */ - struct netmap_adapter up; - - struct net_device_ops nm_ndo; // XXX linux only - struct ethtool_ops nm_eto; // XXX linux only - const struct ethtool_ops* save_ethtool; -}; - -/* Mitigation support. */ -struct nm_generic_mit { - struct hrtimer mit_timer; - int mit_pending; - int mit_ring_idx; /* index of the ring being mitigated */ - struct netmap_adapter *mit_na; /* backpointer */ -}; - -struct netmap_generic_adapter { /* emulated device */ - struct netmap_hw_adapter up; - - /* Pointer to a previously used netmap adapter. */ - struct netmap_adapter *prev; - - /* generic netmap adapters support: - * a net_device_ops struct overrides ndo_select_queue(), - * save_if_input saves the if_input hook (FreeBSD), - * mit implements rx interrupt mitigation, - */ - struct net_device_ops generic_ndo; - void (*save_if_input)(struct ifnet *, struct mbuf *); - - struct nm_generic_mit *mit; -#ifdef linux - netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *); -#endif -}; - -static __inline int -netmap_real_tx_rings(struct netmap_adapter *na) -{ - return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS); -} - -static __inline int -netmap_real_rx_rings(struct netmap_adapter *na) -{ - return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS); -} - -#ifdef WITH_VALE - -/* - * Bridge wrapper for non VALE ports attached to a VALE switch. - * - * The real device must already have its own netmap adapter (hwna). - * The bridge wrapper and the hwna adapter share the same set of - * netmap rings and buffers, but they have two separate sets of - * krings descriptors, with tx/rx meanings swapped: - * - * netmap - * bwrap krings rings krings hwna - * +------+ +------+ +-----+ +------+ +------+ - * |tx_rings->| |\ /| |----| |<-tx_rings| - * | | +------+ \ / +-----+ +------+ | | - * | | X | | - * | | / \ | | - * | | +------+/ \+-----+ +------+ | | - * |rx_rings->| | | |----| |<-rx_rings| - * | | +------+ +-----+ +------+ | | - * +------+ +------+ - * - * - packets coming from the bridge go to the brwap rx rings, - * which are also the hwna tx rings. The bwrap notify callback - * will then complete the hwna tx (see netmap_bwrap_notify). - * - * - packets coming from the outside go to the hwna rx rings, - * which are also the bwrap tx rings. The (overwritten) hwna - * notify method will then complete the bridge tx - * (see netmap_bwrap_intr_notify). - * - * The bridge wrapper may optionally connect the hwna 'host' rings - * to the bridge. This is done by using a second port in the - * bridge and connecting it to the 'host' netmap_vp_adapter - * contained in the netmap_bwrap_adapter. The brwap host adapter - * cross-links the hwna host rings in the same way as shown above. - * - * - packets coming from the bridge and directed to the host stack - * are handled by the bwrap host notify callback - * (see netmap_bwrap_host_notify) - * - * - packets coming from the host stack are still handled by the - * overwritten hwna notify callback (netmap_bwrap_intr_notify), - * but are diverted to the host adapter depending on the ring number. - * - */ -struct netmap_bwrap_adapter { - struct netmap_vp_adapter up; - struct netmap_vp_adapter host; /* for host rings */ - struct netmap_adapter *hwna; /* the underlying device */ - - /* backup of the hwna notify callback */ - int (*save_notify)(struct netmap_adapter *, - u_int ring, enum txrx, int flags); - - /* - * When we attach a physical interface to the bridge, we - * allow the controlling process to terminate, so we need - * a place to store the netmap_priv_d data structure. - * This is only done when physical interfaces - * are attached to a bridge. - */ - struct netmap_priv_d *na_kpriv; -}; - - -#endif /* WITH_VALE */ - -#ifdef WITH_PIPES - -#define NM_MAXPIPES 64 /* max number of pipes per adapter */ - -struct netmap_pipe_adapter { - struct netmap_adapter up; - - u_int id; /* pipe identifier */ - int role; /* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */ - - struct netmap_adapter *parent; /* adapter that owns the memory */ - struct netmap_pipe_adapter *peer; /* the other end of the pipe */ - int peer_ref; /* 1 iff we are holding a ref to the peer */ - - u_int parent_slot; /* index in the parent pipe array */ -}; - -#endif /* WITH_PIPES */ - - -/* return slots reserved to rx clients; used in drivers */ -static inline uint32_t -nm_kr_rxspace(struct netmap_kring *k) -{ - int space = k->nr_hwtail - k->nr_hwcur; - if (space < 0) - space += k->nkr_num_slots; - ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail); - - return space; -} - - -/* True if no space in the tx ring. only valid after txsync_prologue */ -static inline int -nm_kr_txempty(struct netmap_kring *kring) -{ - return kring->rcur == kring->nr_hwtail; -} - - -/* - * protect against multiple threads using the same ring. - * also check that the ring has not been stopped. - * We only care for 0 or !=0 as a return code. - */ -#define NM_KR_BUSY 1 -#define NM_KR_STOPPED 2 - - -static __inline void nm_kr_put(struct netmap_kring *kr) -{ - NM_ATOMIC_CLEAR(&kr->nr_busy); -} - - -static __inline int nm_kr_tryget(struct netmap_kring *kr) -{ - /* check a first time without taking the lock - * to avoid starvation for nm_kr_get() - */ - if (unlikely(kr->nkr_stopped)) { - ND("ring %p stopped (%d)", kr, kr->nkr_stopped); - return NM_KR_STOPPED; - } - if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))) - return NM_KR_BUSY; - /* check a second time with lock held */ - if (unlikely(kr->nkr_stopped)) { - ND("ring %p stopped (%d)", kr, kr->nkr_stopped); - nm_kr_put(kr); - return NM_KR_STOPPED; - } - return 0; -} - - -/* - * The following functions are used by individual drivers to - * support netmap operation. - * - * netmap_attach() initializes a struct netmap_adapter, allocating the - * struct netmap_ring's and the struct selinfo. - * - * netmap_detach() frees the memory allocated by netmap_attach(). - * - * netmap_transmit() replaces the if_transmit routine of the interface, - * and is used to intercept packets coming from the stack. - * - * netmap_load_map/netmap_reload_map are helper routines to set/reset - * the dmamap for a packet buffer - * - * netmap_reset() is a helper routine to be called in the driver - * when reinitializing a ring. - */ -int netmap_attach(struct netmap_adapter *); -int netmap_attach_common(struct netmap_adapter *); -void netmap_detach_common(struct netmap_adapter *na); -void netmap_detach(struct ifnet *); -int netmap_transmit(struct ifnet *, struct mbuf *); -struct netmap_slot *netmap_reset(struct netmap_adapter *na, - enum txrx tx, u_int n, u_int new_cur); -int netmap_ring_reinit(struct netmap_kring *); - -/* default functions to handle rx/tx interrupts */ -int netmap_rx_irq(struct ifnet *, u_int, u_int *); -#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) -void netmap_common_irq(struct ifnet *, u_int, u_int *work_done); - -void netmap_disable_all_rings(struct ifnet *); -void netmap_enable_all_rings(struct ifnet *); -void netmap_disable_ring(struct netmap_kring *kr); - - -/* set/clear native flags and if_transmit/netdev_ops */ -static inline void -nm_set_native_flags(struct netmap_adapter *na) -{ - struct ifnet *ifp = na->ifp; - - na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON); -#ifdef IFCAP_NETMAP /* or FreeBSD ? */ - ifp->if_capenable |= IFCAP_NETMAP; -#endif -#ifdef __FreeBSD__ - na->if_transmit = ifp->if_transmit; - ifp->if_transmit = netmap_transmit; -#else - na->if_transmit = (void *)ifp->netdev_ops; - ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo; - ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops; - SET_ETHTOOL_OPS(ifp, &((struct netmap_hw_adapter*)na)->nm_eto); -#endif -} - - -static inline void -nm_clear_native_flags(struct netmap_adapter *na) -{ - struct ifnet *ifp = na->ifp; - -#ifdef __FreeBSD__ - ifp->if_transmit = na->if_transmit; -#else - ifp->netdev_ops = (void *)na->if_transmit; - SET_ETHTOOL_OPS(ifp, ((struct netmap_hw_adapter*)na)->save_ethtool); -#endif - na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON); -#ifdef IFCAP_NETMAP /* or FreeBSD ? */ - ifp->if_capenable &= ~IFCAP_NETMAP; -#endif -} - - -/* - * validates parameters in the ring/kring, returns a value for head - * If any error, returns ring_size to force a reinit. - */ -uint32_t nm_txsync_prologue(struct netmap_kring *); - - -/* - * validates parameters in the ring/kring, returns a value for head, - * and the 'reserved' value in the argument. - * If any error, returns ring_size lim to force a reinit. - */ -uint32_t nm_rxsync_prologue(struct netmap_kring *); - - -/* - * update kring and ring at the end of txsync. - */ -static inline void -nm_txsync_finalize(struct netmap_kring *kring) -{ - /* update ring tail to what the kernel knows */ - kring->ring->tail = kring->rtail = kring->nr_hwtail; - - /* note, head/rhead/hwcur might be behind cur/rcur - * if no carrier - */ - ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d", - kring->name, kring->nr_hwcur, kring->nr_hwtail, - kring->rhead, kring->rcur, kring->rtail); -} - - -/* - * update kring and ring at the end of rxsync - */ -static inline void -nm_rxsync_finalize(struct netmap_kring *kring) -{ - /* tell userspace that there might be new packets */ - //struct netmap_ring *ring = kring->ring; - ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail, - kring->nr_hwtail); - kring->ring->tail = kring->rtail = kring->nr_hwtail; - /* make a copy of the state for next round */ - kring->rhead = kring->ring->head; - kring->rcur = kring->ring->cur; -} - - -/* check/fix address and len in tx rings */ -#if 1 /* debug version */ -#define NM_CHECK_ADDR_LEN(_a, _l) do { \ - if (_a == netmap_buffer_base || _l > NETMAP_BUF_SIZE) { \ - RD(5, "bad addr/len ring %d slot %d idx %d len %d", \ - kring->ring_id, nm_i, slot->buf_idx, len); \ - if (_l > NETMAP_BUF_SIZE) \ - _l = NETMAP_BUF_SIZE; \ - } } while (0) -#else /* no debug version */ -#define NM_CHECK_ADDR_LEN(_a, _l) do { \ - if (_l > NETMAP_BUF_SIZE) \ - _l = NETMAP_BUF_SIZE; \ - } while (0) -#endif - - -/*---------------------------------------------------------------*/ -/* - * Support routines to be used with the VALE switch - */ -int netmap_update_config(struct netmap_adapter *na); -int netmap_krings_create(struct netmap_adapter *na, u_int tailroom); -void netmap_krings_delete(struct netmap_adapter *na); -int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait); - - -struct netmap_if * -netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags, int *err); - - - -u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); -int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create); -int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); - - -#ifdef WITH_VALE -/* - * The following bridge-related functions are used by other - * kernel modules. - * - * VALE only supports unicast or broadcast. The lookup - * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, - * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown. - * XXX in practice "unknown" might be handled same as broadcast. - */ -typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len, - uint8_t *ring_nr, struct netmap_vp_adapter *); -u_int netmap_bdg_learning(char *, u_int, uint8_t *, - struct netmap_vp_adapter *); - -#define NM_BDG_MAXPORTS 254 /* up to 254 */ -#define NM_BDG_BROADCAST NM_BDG_MAXPORTS -#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) - -#define NM_NAME "vale" /* prefix for bridge port name */ - - -/* these are redefined in case of no VALE support */ -int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create); -void netmap_init_bridges(void); -int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func); - -#else /* !WITH_VALE */ -#define netmap_get_bdg_na(_1, _2, _3) 0 -#define netmap_init_bridges(_1) -#define netmap_bdg_ctl(_1, _2) EINVAL -#endif /* !WITH_VALE */ - -#ifdef WITH_PIPES -/* max number of pipes per device */ -#define NM_MAXPIPES 64 /* XXX how many? */ -/* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */ -int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr); -void netmap_pipe_dealloc(struct netmap_adapter *); -int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create); -#else /* !WITH_PIPES */ -#define NM_MAXPIPES 0 -#define netmap_pipe_alloc(_1, _2) EOPNOTSUPP -#define netmap_pipe_dealloc(_1) -#define netmap_get_pipe_na(_1, _2, _3) 0 -#endif - -/* Various prototypes */ -int netmap_poll(struct cdev *dev, int events, struct thread *td); -int netmap_init(void); -void netmap_fini(void); -int netmap_get_memory(struct netmap_priv_d* p); -void netmap_dtor(void *data); -int netmap_dtor_locked(struct netmap_priv_d *priv); - -int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); - -/* netmap_adapter creation/destruction */ -#define NM_IFPNAME(ifp) ((ifp) ? (ifp)->if_xname : "zombie") - -// #define NM_DEBUG_PUTGET 1 - -#ifdef NM_DEBUG_PUTGET - -#define NM_DBG(f) __##f - -void __netmap_adapter_get(struct netmap_adapter *na); - -#define netmap_adapter_get(na) \ - do { \ - struct netmap_adapter *__na = na; \ - D("getting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \ - __netmap_adapter_get(__na); \ - } while (0) - -int __netmap_adapter_put(struct netmap_adapter *na); - -#define netmap_adapter_put(na) \ - ({ \ - struct netmap_adapter *__na = na; \ - D("putting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \ - __netmap_adapter_put(__na); \ - }) - -#else /* !NM_DEBUG_PUTGET */ - -#define NM_DBG(f) f -void netmap_adapter_get(struct netmap_adapter *na); -int netmap_adapter_put(struct netmap_adapter *na); - -#endif /* !NM_DEBUG_PUTGET */ - - -/* - * module variables - */ -extern u_int netmap_buf_size; -#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove -extern int netmap_mitigate; // XXX not really used -extern int netmap_no_pendintr; -extern u_int netmap_total_buffers; // global allocator -extern char *netmap_buffer_base; // global allocator -extern int netmap_verbose; // XXX debugging -enum { /* verbose flags */ - NM_VERB_ON = 1, /* generic verbose */ - NM_VERB_HOST = 0x2, /* verbose host stack */ - NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */ - NM_VERB_TXSYNC = 0x20, - NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */ - NM_VERB_TXINTR = 0x200, - NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */ - NM_VERB_NIC_TXSYNC = 0x2000, -}; - -extern int netmap_txsync_retry; -extern int netmap_generic_mit; -extern int netmap_generic_ringsize; -extern int netmap_generic_rings; - -/* - * NA returns a pointer to the struct netmap adapter from the ifp, - * WNA is used to write it. - */ -#ifndef WNA -#define WNA(_ifp) (_ifp)->if_pspare[0] -#endif -#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp)) - -/* - * Macros to determine if an interface is netmap capable or netmap enabled. - * See the magic field in struct netmap_adapter. - */ -#ifdef __FreeBSD__ -/* - * on FreeBSD just use if_capabilities and if_capenable. - */ -#define NETMAP_CAPABLE(ifp) (NA(ifp) && \ - (ifp)->if_capabilities & IFCAP_NETMAP ) - -#define NETMAP_SET_CAPABLE(ifp) \ - (ifp)->if_capabilities |= IFCAP_NETMAP - -#else /* linux */ - -/* - * on linux: - * we check if NA(ifp) is set and its first element has a related - * magic value. The capenable is within the struct netmap_adapter. - */ -#define NETMAP_MAGIC 0x52697a7a - -#define NETMAP_CAPABLE(ifp) (NA(ifp) && \ - ((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC ) - -#define NETMAP_SET_CAPABLE(ifp) \ - NA(ifp)->magic = ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC - -#endif /* linux */ - -#ifdef __FreeBSD__ - -/* Callback invoked by the dma machinery after a successful dmamap_load */ -static void netmap_dmamap_cb(__unused void *arg, - __unused bus_dma_segment_t * segs, __unused int nseg, __unused int error) -{ -} - -/* bus_dmamap_load wrapper: call aforementioned function if map != NULL. - * XXX can we do it without a callback ? - */ -static inline void -netmap_load_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf) -{ - if (map) - bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE, - netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); -} - -/* update the map when a buffer changes. */ -static inline void -netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf) -{ - if (map) { - bus_dmamap_unload(tag, map); - bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE, - netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); - } -} - -#else /* linux */ - -/* - * XXX How do we redefine these functions: - * - * on linux we need - * dma_map_single(&pdev->dev, virt_addr, len, direction) - * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction - * The len can be implicit (on netmap it is NETMAP_BUF_SIZE) - * unfortunately the direction is not, so we need to change - * something to have a cross API - */ -#define netmap_load_map(_t, _m, _b) -#define netmap_reload_map(_t, _m, _b) -#if 0 - struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l]; - /* set time_stamp *before* dma to help avoid a possible race */ - buffer_info->time_stamp = jiffies; - buffer_info->mapped_as_page = false; - buffer_info->length = len; - //buffer_info->next_to_watch = l; - /* reload dma map */ - dma_unmap_single(&adapter->pdev->dev, buffer_info->dma, - NETMAP_BUF_SIZE, DMA_TO_DEVICE); - buffer_info->dma = dma_map_single(&adapter->pdev->dev, - addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE); - - if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { - D("dma mapping error"); - /* goto dma_error; See e1000_put_txbuf() */ - /* XXX reset */ - } - tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX - -#endif - -/* - * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction. - */ -#define bus_dmamap_sync(_a, _b, _c) - -#endif /* linux */ - - -/* - * functions to map NIC to KRING indexes (n2k) and vice versa (k2n) - */ -static inline int -netmap_idx_n2k(struct netmap_kring *kr, int idx) -{ - int n = kr->nkr_num_slots; - idx += kr->nkr_hwofs; - if (idx < 0) - return idx + n; - else if (idx < n) - return idx; - else - return idx - n; -} - - -static inline int -netmap_idx_k2n(struct netmap_kring *kr, int idx) -{ - int n = kr->nkr_num_slots; - idx -= kr->nkr_hwofs; - if (idx < 0) - return idx + n; - else if (idx < n) - return idx; - else - return idx - n; -} - - -/* Entries of the look-up table. */ -struct lut_entry { - void *vaddr; /* virtual address. */ - vm_paddr_t paddr; /* physical address. */ -}; - -struct netmap_obj_pool; -extern struct lut_entry *netmap_buffer_lut; -#define NMB_VA(i) (netmap_buffer_lut[i].vaddr) -#define NMB_PA(i) (netmap_buffer_lut[i].paddr) - -/* - * NMB return the virtual address of a buffer (buffer 0 on bad index) - * PNMB also fills the physical address - */ -static inline void * -NMB(struct netmap_slot *slot) -{ - uint32_t i = slot->buf_idx; - return (unlikely(i >= netmap_total_buffers)) ? NMB_VA(0) : NMB_VA(i); -} - -static inline void * -PNMB(struct netmap_slot *slot, uint64_t *pp) -{ - uint32_t i = slot->buf_idx; - void *ret = (i >= netmap_total_buffers) ? NMB_VA(0) : NMB_VA(i); - - *pp = (i >= netmap_total_buffers) ? NMB_PA(0) : NMB_PA(i); - return ret; -} - -/* Generic version of NMB, which uses device-specific memory. */ -static inline void * -BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot) -{ - struct lut_entry *lut = na->na_lut; - uint32_t i = slot->buf_idx; - return (unlikely(i >= na->na_lut_objtotal)) ? - lut[0].vaddr : lut[i].vaddr; -} - - - -void netmap_txsync_to_host(struct netmap_adapter *na); - - -/* - * Structure associated to each thread which registered an interface. - * - * The first 4 fields of this structure are written by NIOCREGIF and - * read by poll() and NIOC?XSYNC. - * - * There is low contention among writers (a correct user program - * should have none) and among writers and readers, so we use a - * single global lock to protect the structure initialization; - * since initialization involves the allocation of memory, - * we reuse the memory allocator lock. - * - * Read access to the structure is lock free. Readers must check that - * np_nifp is not NULL before using the other fields. - * If np_nifp is NULL initialization has not been performed, - * so they should return an error to userspace. - * - * The ref_done field is used to regulate access to the refcount in the - * memory allocator. The refcount must be incremented at most once for - * each open("/dev/netmap"). The increment is performed by the first - * function that calls netmap_get_memory() (currently called by - * mmap(), NIOCGINFO and NIOCREGIF). - * If the refcount is incremented, it is then decremented when the - * private structure is destroyed. - */ -struct netmap_priv_d { - struct netmap_if * volatile np_nifp; /* netmap if descriptor. */ - - struct netmap_adapter *np_na; - uint32_t np_flags; /* from the ioctl */ - u_int np_txqfirst, np_txqlast; /* range of tx rings to scan */ - u_int np_rxqfirst, np_rxqlast; /* range of rx rings to scan */ - uint16_t np_txpoll; /* XXX and also np_rxpoll ? */ - - struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ - /* np_refcount is only used on FreeBSD */ - int np_refcount; /* use with NMG_LOCK held */ - - /* pointers to the selinfo to be used for selrecord. - * Either the local or the global one depending on the - * number of rings. - */ - NM_SELINFO_T *np_rxsi, *np_txsi; - struct thread *np_td; /* kqueue, just debugging */ -}; - - -/* - * generic netmap emulation for devices that do not have - * native netmap support. - */ -int generic_netmap_attach(struct ifnet *ifp); - -int netmap_catch_rx(struct netmap_adapter *na, int intercept); -void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);; -void netmap_catch_tx(struct netmap_generic_adapter *na, int enable); -int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr); -int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx); -void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq); - -//#define RATE_GENERIC /* Enables communication statistics for generic. */ -#ifdef RATE_GENERIC -void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi); -#else -#define generic_rate(txp, txs, txi, rxp, rxs, rxi) -#endif - -/* - * netmap_mitigation API. This is used by the generic adapter - * to reduce the number of interrupt requests/selwakeup - * to clients on incoming packets. - */ -void netmap_mitigation_init(struct nm_generic_mit *mit, int idx, - struct netmap_adapter *na); -void netmap_mitigation_start(struct nm_generic_mit *mit); -void netmap_mitigation_restart(struct nm_generic_mit *mit); -int netmap_mitigation_active(struct nm_generic_mit *mit); -void netmap_mitigation_cleanup(struct nm_generic_mit *mit); - - - -/* Shared declarations for the VALE switch. */ - -/* - * Each transmit queue accumulates a batch of packets into - * a structure before forwarding. Packets to the same - * destination are put in a list using ft_next as a link field. - * ft_frags and ft_next are valid only on the first fragment. - */ -struct nm_bdg_fwd { /* forwarding entry for a bridge */ - void *ft_buf; /* netmap or indirect buffer */ - uint8_t ft_frags; /* how many fragments (only on 1st frag) */ - uint8_t _ft_port; /* dst port (unused) */ - uint16_t ft_flags; /* flags, e.g. indirect */ - uint16_t ft_len; /* src fragment len */ - uint16_t ft_next; /* next packet to same destination */ -}; - -/* struct 'virtio_net_hdr' from linux. */ -struct nm_vnet_hdr { -#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ -#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ - uint8_t flags; -#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ -#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ -#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ -#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ -#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ - uint8_t gso_type; - uint16_t hdr_len; - uint16_t gso_size; - uint16_t csum_start; - uint16_t csum_offset; -}; - -#define WORST_CASE_GSO_HEADER (14+40+60) /* IPv6 + TCP */ - -/* Private definitions for IPv4, IPv6, UDP and TCP headers. */ - -struct nm_iphdr { - uint8_t version_ihl; - uint8_t tos; - uint16_t tot_len; - uint16_t id; - uint16_t frag_off; - uint8_t ttl; - uint8_t protocol; - uint16_t check; - uint32_t saddr; - uint32_t daddr; - /*The options start here. */ -}; - -struct nm_tcphdr { - uint16_t source; - uint16_t dest; - uint32_t seq; - uint32_t ack_seq; - uint8_t doff; /* Data offset + Reserved */ - uint8_t flags; - uint16_t window; - uint16_t check; - uint16_t urg_ptr; -}; - -struct nm_udphdr { - uint16_t source; - uint16_t dest; - uint16_t len; - uint16_t check; -}; - -struct nm_ipv6hdr { - uint8_t priority_version; - uint8_t flow_lbl[3]; - - uint16_t payload_len; - uint8_t nexthdr; - uint8_t hop_limit; - - uint8_t saddr[16]; - uint8_t daddr[16]; -}; - -/* Type used to store a checksum (in host byte order) that hasn't been - * folded yet. - */ -#define rawsum_t uint32_t - -rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum); -uint16_t nm_csum_ipv4(struct nm_iphdr *iph); -void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, - size_t datalen, uint16_t *check); -void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, - size_t datalen, uint16_t *check); -uint16_t nm_csum_fold(rawsum_t cur_sum); - -void bdg_mismatch_datapath(struct netmap_vp_adapter *na, - struct netmap_vp_adapter *dst_na, - struct nm_bdg_fwd *ft_p, struct netmap_ring *ring, - u_int *j, u_int lim, u_int *howmany); - -#endif /* _NET_NETMAP_KERN_H_ */ diff --git a/netmap/sys/dev/netmap/netmap_mbq.c b/netmap/sys/dev/netmap/netmap_mbq.c deleted file mode 100644 index 2606b13..0000000 --- a/netmap/sys/dev/netmap/netmap_mbq.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - */ - - -#ifdef linux -#include "bsd_glue.h" -#else /* __FreeBSD__ */ -#include -#include -#include -#include -#include -#endif /* __FreeBSD__ */ - -#include "netmap_mbq.h" - - -static inline void __mbq_init(struct mbq *q) -{ - q->head = q->tail = NULL; - q->count = 0; -} - - -void mbq_safe_init(struct mbq *q) -{ - mtx_init(&q->lock, "mbq", NULL, MTX_SPIN); - __mbq_init(q); -} - - -void mbq_init(struct mbq *q) -{ - __mbq_init(q); -} - - -static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m) -{ - m->m_nextpkt = NULL; - if (q->tail) { - q->tail->m_nextpkt = m; - q->tail = m; - } else { - q->head = q->tail = m; - } - q->count++; -} - - -void mbq_safe_enqueue(struct mbq *q, struct mbuf *m) -{ - mtx_lock(&q->lock); - __mbq_enqueue(q, m); - mtx_unlock(&q->lock); -} - - -void mbq_enqueue(struct mbq *q, struct mbuf *m) -{ - __mbq_enqueue(q, m); -} - - -static inline struct mbuf *__mbq_dequeue(struct mbq *q) -{ - struct mbuf *ret = NULL; - - if (q->head) { - ret = q->head; - q->head = ret->m_nextpkt; - if (q->head == NULL) { - q->tail = NULL; - } - q->count--; - ret->m_nextpkt = NULL; - } - - return ret; -} - - -struct mbuf *mbq_safe_dequeue(struct mbq *q) -{ - struct mbuf *ret; - - mtx_lock(&q->lock); - ret = __mbq_dequeue(q); - mtx_unlock(&q->lock); - - return ret; -} - - -struct mbuf *mbq_dequeue(struct mbq *q) -{ - return __mbq_dequeue(q); -} - - -/* XXX seems pointless to have a generic purge */ -static void __mbq_purge(struct mbq *q, int safe) -{ - struct mbuf *m; - - for (;;) { - m = safe ? mbq_safe_dequeue(q) : mbq_dequeue(q); - if (m) { - m_freem(m); - } else { - break; - } - } -} - - -void mbq_purge(struct mbq *q) -{ - __mbq_purge(q, 0); -} - - -void mbq_safe_purge(struct mbq *q) -{ - __mbq_purge(q, 1); -} - - -void mbq_safe_destroy(struct mbq *q) -{ - mtx_destroy(&q->lock); -} - - -void mbq_destroy(struct mbq *q) -{ -} diff --git a/netmap/sys/dev/netmap/netmap_mbq.h b/netmap/sys/dev/netmap/netmap_mbq.h deleted file mode 100644 index d273d8a..0000000 --- a/netmap/sys/dev/netmap/netmap_mbq.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - */ - - -#ifndef __NETMAP_MBQ_H__ -#define __NETMAP_MBQ_H__ - -/* - * These function implement an mbuf tailq with an optional lock. - * The base functions act ONLY ON THE QUEUE, whereas the "safe" - * variants (mbq_safe_*) also handle the lock. - */ - -/* XXX probably rely on a previous definition of SPINLOCK_T */ -#ifdef linux -#define SPINLOCK_T safe_spinlock_t -#else -#define SPINLOCK_T struct mtx -#endif - -/* A FIFO queue of mbufs with an optional lock. */ -struct mbq { - struct mbuf *head; - struct mbuf *tail; - int count; - SPINLOCK_T lock; -}; - -/* XXX "destroy" does not match "init" as a name. - * We should also clarify whether init can be used while - * holding a lock, and whether mbq_safe_destroy() is a NOP. - */ -void mbq_init(struct mbq *q); -void mbq_destroy(struct mbq *q); -void mbq_enqueue(struct mbq *q, struct mbuf *m); -struct mbuf *mbq_dequeue(struct mbq *q); -void mbq_purge(struct mbq *q); - -/* XXX missing mbq_lock() and mbq_unlock */ - -void mbq_safe_init(struct mbq *q); -void mbq_safe_destroy(struct mbq *q); -void mbq_safe_enqueue(struct mbq *q, struct mbuf *m); -struct mbuf *mbq_safe_dequeue(struct mbq *q); -void mbq_safe_purge(struct mbq *q); - -static inline unsigned int mbq_len(struct mbq *q) -{ - return q->count; -} - -#endif /* __NETMAP_MBQ_H_ */ diff --git a/netmap/sys/dev/netmap/netmap_mem2.c b/netmap/sys/dev/netmap/netmap_mem2.c deleted file mode 100644 index 7eca4ca..0000000 --- a/netmap/sys/dev/netmap/netmap_mem2.c +++ /dev/null @@ -1,1377 +0,0 @@ -/* - * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifdef linux -#include "bsd_glue.h" -#endif /* linux */ - -#ifdef __APPLE__ -#include "osx_glue.h" -#endif /* __APPLE__ */ - -#ifdef __FreeBSD__ -#include /* prerequisite */ -__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 241723 2012-10-19 09:41:45Z glebius $"); - -#include -#include -#include -#include /* vtophys */ -#include /* vtophys */ -#include /* sockaddrs */ -#include -#include -#include -#include -#include -#include /* bus_dmamap_* */ - -#endif /* __FreeBSD__ */ - -#include -#include -#include "netmap_mem2.h" - -#ifdef linux -#define NMA_LOCK_INIT(n) sema_init(&(n)->nm_mtx, 1) -#define NMA_LOCK_DESTROY(n) -#define NMA_LOCK(n) down(&(n)->nm_mtx) -#define NMA_UNLOCK(n) up(&(n)->nm_mtx) -#else /* !linux */ -#define NMA_LOCK_INIT(n) mtx_init(&(n)->nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF) -#define NMA_LOCK_DESTROY(n) mtx_destroy(&(n)->nm_mtx) -#define NMA_LOCK(n) mtx_lock(&(n)->nm_mtx) -#define NMA_UNLOCK(n) mtx_unlock(&(n)->nm_mtx) -#endif /* linux */ - - -struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = { - [NETMAP_IF_POOL] = { - .size = 1024, - .num = 100, - }, - [NETMAP_RING_POOL] = { - .size = 9*PAGE_SIZE, - .num = 200, - }, - [NETMAP_BUF_POOL] = { - .size = 2048, - .num = NETMAP_BUF_MAX_NUM, - }, -}; - -struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = { - [NETMAP_IF_POOL] = { - .size = 1024, - .num = 1, - }, - [NETMAP_RING_POOL] = { - .size = 5*PAGE_SIZE, - .num = 4, - }, - [NETMAP_BUF_POOL] = { - .size = 2048, - .num = 4098, - }, -}; - - -/* - * nm_mem is the memory allocator used for all physical interfaces - * running in netmap mode. - * Virtual (VALE) ports will have each its own allocator. - */ -static int netmap_mem_global_config(struct netmap_mem_d *nmd); -static int netmap_mem_global_finalize(struct netmap_mem_d *nmd); -static void netmap_mem_global_deref(struct netmap_mem_d *nmd); -struct netmap_mem_d nm_mem = { /* Our memory allocator. */ - .pools = { - [NETMAP_IF_POOL] = { - .name = "netmap_if", - .objminsize = sizeof(struct netmap_if), - .objmaxsize = 4096, - .nummin = 10, /* don't be stingy */ - .nummax = 10000, /* XXX very large */ - }, - [NETMAP_RING_POOL] = { - .name = "netmap_ring", - .objminsize = sizeof(struct netmap_ring), - .objmaxsize = 32*PAGE_SIZE, - .nummin = 2, - .nummax = 1024, - }, - [NETMAP_BUF_POOL] = { - .name = "netmap_buf", - .objminsize = 64, - .objmaxsize = 65536, - .nummin = 4, - .nummax = 1000000, /* one million! */ - }, - }, - .config = netmap_mem_global_config, - .finalize = netmap_mem_global_finalize, - .deref = netmap_mem_global_deref, - - .nm_id = 1, - - .prev = &nm_mem, - .next = &nm_mem, -}; - - -struct netmap_mem_d *netmap_last_mem_d = &nm_mem; - -// XXX logically belongs to nm_mem -struct lut_entry *netmap_buffer_lut; /* exported */ - -/* blueprint for the private memory allocators */ -static int netmap_mem_private_config(struct netmap_mem_d *nmd); -static int netmap_mem_private_finalize(struct netmap_mem_d *nmd); -static void netmap_mem_private_deref(struct netmap_mem_d *nmd); -const struct netmap_mem_d nm_blueprint = { - .pools = { - [NETMAP_IF_POOL] = { - .name = "%s_if", - .objminsize = sizeof(struct netmap_if), - .objmaxsize = 4096, - .nummin = 1, - .nummax = 100, - }, - [NETMAP_RING_POOL] = { - .name = "%s_ring", - .objminsize = sizeof(struct netmap_ring), - .objmaxsize = 32*PAGE_SIZE, - .nummin = 2, - .nummax = 1024, - }, - [NETMAP_BUF_POOL] = { - .name = "%s_buf", - .objminsize = 64, - .objmaxsize = 65536, - .nummin = 4, - .nummax = 1000000, /* one million! */ - }, - }, - .config = netmap_mem_private_config, - .finalize = netmap_mem_private_finalize, - .deref = netmap_mem_private_deref, - - .flags = NETMAP_MEM_PRIVATE, -}; - -/* memory allocator related sysctls */ - -#define STRINGIFY(x) #x - - -#define DECLARE_SYSCTLS(id, name) \ - SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \ - CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \ - SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \ - CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ - SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \ - CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \ - SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \ - CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \ - SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \ - CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \ - "Default size of private netmap " STRINGIFY(name) "s"); \ - SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \ - CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \ - "Default number of private netmap " STRINGIFY(name) "s") - -SYSCTL_DECL(_dev_netmap); -DECLARE_SYSCTLS(NETMAP_IF_POOL, if); -DECLARE_SYSCTLS(NETMAP_RING_POOL, ring); -DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); - -static int -nm_mem_assign_id(struct netmap_mem_d *nmd) -{ - nm_memid_t id; - struct netmap_mem_d *scan = netmap_last_mem_d; - int error = ENOMEM; - - NMA_LOCK(&nm_mem); - - do { - /* we rely on unsigned wrap around */ - id = scan->nm_id + 1; - if (id == 0) /* reserve 0 as error value */ - id = 1; - scan = scan->next; - if (id != scan->nm_id) { - nmd->nm_id = id; - nmd->prev = scan->prev; - nmd->next = scan; - scan->prev->next = nmd; - scan->prev = nmd; - netmap_last_mem_d = nmd; - error = 0; - break; - } - } while (scan != netmap_last_mem_d); - - NMA_UNLOCK(&nm_mem); - return error; -} - -static void -nm_mem_release_id(struct netmap_mem_d *nmd) -{ - NMA_LOCK(&nm_mem); - - nmd->prev->next = nmd->next; - nmd->next->prev = nmd->prev; - - if (netmap_last_mem_d == nmd) - netmap_last_mem_d = nmd->prev; - - nmd->prev = nmd->next = NULL; - - NMA_UNLOCK(&nm_mem); -} - - -/* - * First, find the allocator that contains the requested offset, - * then locate the cluster through a lookup table. - */ -vm_paddr_t -netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) -{ - int i; - vm_ooffset_t o = offset; - vm_paddr_t pa; - struct netmap_obj_pool *p; - - NMA_LOCK(nmd); - p = nmd->pools; - - for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) { - if (offset >= p[i].memtotal) - continue; - // now lookup the cluster's address - pa = p[i].lut[offset / p[i]._objsize].paddr + - offset % p[i]._objsize; - NMA_UNLOCK(nmd); - return pa; - } - /* this is only in case of errors */ - D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o, - p[NETMAP_IF_POOL].memtotal, - p[NETMAP_IF_POOL].memtotal - + p[NETMAP_RING_POOL].memtotal, - p[NETMAP_IF_POOL].memtotal - + p[NETMAP_RING_POOL].memtotal - + p[NETMAP_BUF_POOL].memtotal); - NMA_UNLOCK(nmd); - return 0; // XXX bad address -} - -int -netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags, - nm_memid_t *id) -{ - int error = 0; - NMA_LOCK(nmd); - error = nmd->config(nmd); - if (error) - goto out; - if (nmd->flags & NETMAP_MEM_FINALIZED) { - *size = nmd->nm_totalsize; - } else { - int i; - *size = 0; - for (i = 0; i < NETMAP_POOLS_NR; i++) { - struct netmap_obj_pool *p = nmd->pools + i; - *size += (p->_numclusters * p->_clustsize); - } - } - *memflags = nmd->flags; - *id = nmd->nm_id; -out: - NMA_UNLOCK(nmd); - return error; -} - -/* - * we store objects by kernel address, need to find the offset - * within the pool to export the value to userspace. - * Algorithm: scan until we find the cluster, then add the - * actual offset in the cluster - */ -static ssize_t -netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) -{ - int i, k = p->_clustentries, n = p->objtotal; - ssize_t ofs = 0; - - for (i = 0; i < n; i += k, ofs += p->_clustsize) { - const char *base = p->lut[i].vaddr; - ssize_t relofs = (const char *) vaddr - base; - - if (relofs < 0 || relofs >= p->_clustsize) - continue; - - ofs = ofs + relofs; - ND("%s: return offset %d (cluster %d) for pointer %p", - p->name, ofs, i, vaddr); - return ofs; - } - D("address %p is not contained inside any cluster (%s)", - vaddr, p->name); - return 0; /* An error occurred */ -} - -/* Helper functions which convert virtual addresses to offsets */ -#define netmap_if_offset(n, v) \ - netmap_obj_offset(&(n)->pools[NETMAP_IF_POOL], (v)) - -#define netmap_ring_offset(n, v) \ - ((n)->pools[NETMAP_IF_POOL].memtotal + \ - netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v))) - -#define netmap_buf_offset(n, v) \ - ((n)->pools[NETMAP_IF_POOL].memtotal + \ - (n)->pools[NETMAP_RING_POOL].memtotal + \ - netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v))) - - -ssize_t -netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *addr) -{ - ssize_t v; - NMA_LOCK(nmd); - v = netmap_if_offset(nmd, addr); - NMA_UNLOCK(nmd); - return v; -} - -/* - * report the index, and use start position as a hint, - * otherwise buffer allocation becomes terribly expensive. - */ -static void * -netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index) -{ - uint32_t i = 0; /* index in the bitmap */ - uint32_t mask, j; /* slot counter */ - void *vaddr = NULL; - - if (len > p->_objsize) { - D("%s request size %d too large", p->name, len); - // XXX cannot reduce the size - return NULL; - } - - if (p->objfree == 0) { - D("no more %s objects", p->name); - return NULL; - } - if (start) - i = *start; - - /* termination is guaranteed by p->free, but better check bounds on i */ - while (vaddr == NULL && i < p->bitmap_slots) { - uint32_t cur = p->bitmap[i]; - if (cur == 0) { /* bitmask is fully used */ - i++; - continue; - } - /* locate a slot */ - for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1) - ; - - p->bitmap[i] &= ~mask; /* mark object as in use */ - p->objfree--; - - vaddr = p->lut[i * 32 + j].vaddr; - if (index) - *index = i * 32 + j; - } - ND("%s allocator: allocated object @ [%d][%d]: vaddr %p", i, j, vaddr); - - if (start) - *start = i; - return vaddr; -} - - -/* - * free by index, not by address. - * XXX should we also cleanup the content ? - */ -static int -netmap_obj_free(struct netmap_obj_pool *p, uint32_t j) -{ - uint32_t *ptr, mask; - - if (j >= p->objtotal) { - D("invalid index %u, max %u", j, p->objtotal); - return 1; - } - ptr = &p->bitmap[j / 32]; - mask = (1 << (j % 32)); - if (*ptr & mask) { - D("ouch, double free on buffer %d", j); - return 1; - } else { - *ptr |= mask; - p->objfree++; - return 0; - } -} - -/* - * free by address. This is slow but is only used for a few - * objects (rings, nifp) - */ -static void -netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) -{ - u_int i, j, n = p->numclusters; - - for (i = 0, j = 0; i < n; i++, j += p->_clustentries) { - void *base = p->lut[i * p->_clustentries].vaddr; - ssize_t relofs = (ssize_t) vaddr - (ssize_t) base; - - /* Given address, is out of the scope of the current cluster.*/ - if (vaddr < base || relofs >= p->_clustsize) - continue; - - j = j + relofs / p->_objsize; - /* KASSERT(j != 0, ("Cannot free object 0")); */ - netmap_obj_free(p, j); - return; - } - D("address %p is not contained inside any cluster (%s)", - vaddr, p->name); -} - -#define netmap_if_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL) -#define netmap_if_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v)) -#define netmap_ring_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL) -#define netmap_ring_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v)) -#define netmap_buf_malloc(n, _pos, _index) \ - netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], NETMAP_BDG_BUF_SIZE(n), _pos, _index) - - -#if 0 // XXX unused -/* Return the index associated to the given packet buffer */ -#define netmap_buf_index(n, v) \ - (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n)) -#endif - -/* - * allocate extra buffers in a linked list. - * returns the actual number. - */ -uint32_t -netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n) -{ - struct netmap_mem_d *nmd = na->nm_mem; - uint32_t i, pos = 0; /* opaque, scan position in the bitmap */ - - NMA_LOCK(nmd); - - *head = 0; /* default, 'null' index ie empty list */ - for (i = 0 ; i < n; i++) { - uint32_t cur = *head; /* save current head */ - uint32_t *p = netmap_buf_malloc(nmd, &pos, head); - if (p == NULL) { - D("no more buffers after %d of %d", i, n); - *head = cur; /* restore */ - break; - } - RD(5, "allocate buffer %d -> %d", *head, cur); - *p = cur; /* link to previous head */ - } - - NMA_UNLOCK(nmd); - - return i; -} - -static void -netmap_extra_free(struct netmap_adapter *na, uint32_t head) -{ - struct lut_entry *lut = na->na_lut; - struct netmap_mem_d *nmd = na->nm_mem; - struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; - uint32_t i, cur, *buf; - - D("freeing the extra list"); - for (i = 0; head >=2 && head < p->objtotal; i++) { - cur = head; - buf = lut[head].vaddr; - head = *buf; - *buf = 0; - if (netmap_obj_free(p, cur)) - break; - } - if (head != 0) - D("breaking with head %d", head); - D("freed %d buffers", i); -} - - -/* Return nonzero on error */ -static int -netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) -{ - struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; - u_int i = 0; /* slot counter */ - uint32_t pos = 0; /* slot in p->bitmap */ - uint32_t index = 0; /* buffer index */ - - for (i = 0; i < n; i++) { - void *vaddr = netmap_buf_malloc(nmd, &pos, &index); - if (vaddr == NULL) { - D("no more buffers after %d of %d", i, n); - goto cleanup; - } - slot[i].buf_idx = index; - slot[i].len = p->_objsize; - slot[i].flags = 0; - } - - ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos); - return (0); - -cleanup: - while (i > 0) { - i--; - netmap_obj_free(p, slot[i].buf_idx); - } - bzero(slot, n * sizeof(slot[0])); - return (ENOMEM); -} - -static void -netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index) -{ - struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; - u_int i; - - for (i = 0; i < n; i++) { - slot[i].buf_idx = index; - slot[i].len = p->_objsize; - slot[i].flags = 0; - } -} - - -static void -netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i) -{ - struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; - - if (i < 2 || i >= p->objtotal) { - D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal); - return; - } - netmap_obj_free(p, i); -} - - -static void -netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) -{ - u_int i; - - for (i = 0; i < n; i++) { - if (slot[i].buf_idx > 2) - netmap_free_buf(nmd, slot[i].buf_idx); - } -} - -static void -netmap_reset_obj_allocator(struct netmap_obj_pool *p) -{ - - if (p == NULL) - return; - if (p->bitmap) - free(p->bitmap, M_NETMAP); - p->bitmap = NULL; - if (p->lut) { - u_int i; - size_t sz = p->_clustsize; - - for (i = 0; i < p->objtotal; i += p->_clustentries) { - if (p->lut[i].vaddr) - contigfree(p->lut[i].vaddr, sz, M_NETMAP); - } - bzero(p->lut, sizeof(struct lut_entry) * p->objtotal); -#ifdef linux - vfree(p->lut); -#else - free(p->lut, M_NETMAP); -#endif - } - p->lut = NULL; - p->objtotal = 0; - p->memtotal = 0; - p->numclusters = 0; - p->objfree = 0; -} - -/* - * Free all resources related to an allocator. - */ -static void -netmap_destroy_obj_allocator(struct netmap_obj_pool *p) -{ - if (p == NULL) - return; - netmap_reset_obj_allocator(p); -} - -/* - * We receive a request for objtotal objects, of size objsize each. - * Internally we may round up both numbers, as we allocate objects - * in small clusters multiple of the page size. - * We need to keep track of objtotal and clustentries, - * as they are needed when freeing memory. - * - * XXX note -- userspace needs the buffers to be contiguous, - * so we cannot afford gaps at the end of a cluster. - */ - - -/* call with NMA_LOCK held */ -static int -netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize) -{ - int i; - u_int clustsize; /* the cluster size, multiple of page size */ - u_int clustentries; /* how many objects per entry */ - - /* we store the current request, so we can - * detect configuration changes later */ - p->r_objtotal = objtotal; - p->r_objsize = objsize; - -#define MAX_CLUSTSIZE (1<<17) -#define LINE_ROUND NM_CACHE_ALIGN // 64 - if (objsize >= MAX_CLUSTSIZE) { - /* we could do it but there is no point */ - D("unsupported allocation for %d bytes", objsize); - return EINVAL; - } - /* make sure objsize is a multiple of LINE_ROUND */ - i = (objsize & (LINE_ROUND - 1)); - if (i) { - D("XXX aligning object by %d bytes", LINE_ROUND - i); - objsize += LINE_ROUND - i; - } - if (objsize < p->objminsize || objsize > p->objmaxsize) { - D("requested objsize %d out of range [%d, %d]", - objsize, p->objminsize, p->objmaxsize); - return EINVAL; - } - if (objtotal < p->nummin || objtotal > p->nummax) { - D("requested objtotal %d out of range [%d, %d]", - objtotal, p->nummin, p->nummax); - return EINVAL; - } - /* - * Compute number of objects using a brute-force approach: - * given a max cluster size, - * we try to fill it with objects keeping track of the - * wasted space to the next page boundary. - */ - for (clustentries = 0, i = 1;; i++) { - u_int delta, used = i * objsize; - if (used > MAX_CLUSTSIZE) - break; - delta = used % PAGE_SIZE; - if (delta == 0) { // exact solution - clustentries = i; - break; - } - if (delta > ( (clustentries*objsize) % PAGE_SIZE) ) - clustentries = i; - } - // D("XXX --- ouch, delta %d (bad for buffers)", delta); - /* compute clustsize and round to the next page */ - clustsize = clustentries * objsize; - i = (clustsize & (PAGE_SIZE - 1)); - if (i) - clustsize += PAGE_SIZE - i; - if (netmap_verbose) - D("objsize %d clustsize %d objects %d", - objsize, clustsize, clustentries); - - /* - * The number of clusters is n = ceil(objtotal/clustentries) - * objtotal' = n * clustentries - */ - p->_clustentries = clustentries; - p->_clustsize = clustsize; - p->_numclusters = (objtotal + clustentries - 1) / clustentries; - - /* actual values (may be larger than requested) */ - p->_objsize = objsize; - p->_objtotal = p->_numclusters * clustentries; - - return 0; -} - - -/* call with NMA_LOCK held */ -static int -netmap_finalize_obj_allocator(struct netmap_obj_pool *p) -{ - int i; /* must be signed */ - size_t n; - - /* optimistically assume we have enough memory */ - p->numclusters = p->_numclusters; - p->objtotal = p->_objtotal; - - n = sizeof(struct lut_entry) * p->objtotal; -#ifdef linux - p->lut = vmalloc(n); -#else - p->lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO); -#endif - if (p->lut == NULL) { - D("Unable to create lookup table (%d bytes) for '%s'", (int)n, p->name); - goto clean; - } - - /* Allocate the bitmap */ - n = (p->objtotal + 31) / 32; - p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_NOWAIT | M_ZERO); - if (p->bitmap == NULL) { - D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, - p->name); - goto clean; - } - p->bitmap_slots = n; - - /* - * Allocate clusters, init pointers and bitmap - */ - - n = p->_clustsize; - for (i = 0; i < (int)p->objtotal;) { - int lim = i + p->_clustentries; - char *clust; - - clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO, - (size_t)0, -1UL, PAGE_SIZE, 0); - if (clust == NULL) { - /* - * If we get here, there is a severe memory shortage, - * so halve the allocated memory to reclaim some. - */ - D("Unable to create cluster at %d for '%s' allocator", - i, p->name); - if (i < 2) /* nothing to halve */ - goto out; - lim = i / 2; - for (i--; i >= lim; i--) { - p->bitmap[ (i>>5) ] &= ~( 1 << (i & 31) ); - if (i % p->_clustentries == 0 && p->lut[i].vaddr) - contigfree(p->lut[i].vaddr, - n, M_NETMAP); - } - out: - p->objtotal = i; - /* we may have stopped in the middle of a cluster */ - p->numclusters = (i + p->_clustentries - 1) / p->_clustentries; - break; - } - for (; i < lim; i++, clust += p->_objsize) { - p->bitmap[ (i>>5) ] |= ( 1 << (i & 31) ); - p->lut[i].vaddr = clust; - p->lut[i].paddr = vtophys(clust); - } - } - p->objfree = p->objtotal; - p->memtotal = p->numclusters * p->_clustsize; - if (p->objfree == 0) - goto clean; - if (netmap_verbose) - D("Pre-allocated %d clusters (%d/%dKB) for '%s'", - p->numclusters, p->_clustsize >> 10, - p->memtotal >> 10, p->name); - - return 0; - -clean: - netmap_reset_obj_allocator(p); - return ENOMEM; -} - -/* call with lock held */ -static int -netmap_memory_config_changed(struct netmap_mem_d *nmd) -{ - int i; - - for (i = 0; i < NETMAP_POOLS_NR; i++) { - if (nmd->pools[i].r_objsize != netmap_params[i].size || - nmd->pools[i].r_objtotal != netmap_params[i].num) - return 1; - } - return 0; -} - -static void -netmap_mem_reset_all(struct netmap_mem_d *nmd) -{ - int i; - - if (netmap_verbose) - D("resetting %p", nmd); - for (i = 0; i < NETMAP_POOLS_NR; i++) { - netmap_reset_obj_allocator(&nmd->pools[i]); - } - nmd->flags &= ~NETMAP_MEM_FINALIZED; -} - -static int -netmap_mem_finalize_all(struct netmap_mem_d *nmd) -{ - int i; - if (nmd->flags & NETMAP_MEM_FINALIZED) - return 0; - nmd->lasterr = 0; - nmd->nm_totalsize = 0; - for (i = 0; i < NETMAP_POOLS_NR; i++) { - nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]); - if (nmd->lasterr) - goto error; - nmd->nm_totalsize += nmd->pools[i].memtotal; - } - /* buffers 0 and 1 are reserved */ - nmd->pools[NETMAP_BUF_POOL].objfree -= 2; - nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; - nmd->flags |= NETMAP_MEM_FINALIZED; - - if (netmap_verbose) - D("interfaces %d KB, rings %d KB, buffers %d MB", - nmd->pools[NETMAP_IF_POOL].memtotal >> 10, - nmd->pools[NETMAP_RING_POOL].memtotal >> 10, - nmd->pools[NETMAP_BUF_POOL].memtotal >> 20); - - if (netmap_verbose) - D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree); - - - return 0; -error: - netmap_mem_reset_all(nmd); - return nmd->lasterr; -} - - - -void -netmap_mem_private_delete(struct netmap_mem_d *nmd) -{ - if (nmd == NULL) - return; - if (netmap_verbose) - D("deleting %p", nmd); - if (nmd->refcount > 0) - D("bug: deleting mem allocator with refcount=%d!", nmd->refcount); - nm_mem_release_id(nmd); - if (netmap_verbose) - D("done deleting %p", nmd); - NMA_LOCK_DESTROY(nmd); - free(nmd, M_DEVBUF); -} - -static int -netmap_mem_private_config(struct netmap_mem_d *nmd) -{ - /* nothing to do, we are configured on creation - * and configuration never changes thereafter - */ - return 0; -} - -static int -netmap_mem_private_finalize(struct netmap_mem_d *nmd) -{ - int err; - NMA_LOCK(nmd); - nmd->refcount++; - err = netmap_mem_finalize_all(nmd); - NMA_UNLOCK(nmd); - return err; - -} - -static void -netmap_mem_private_deref(struct netmap_mem_d *nmd) -{ - NMA_LOCK(nmd); - if (--nmd->refcount <= 0) - netmap_mem_reset_all(nmd); - NMA_UNLOCK(nmd); -} - - -/* - * allocator for private memory - */ -struct netmap_mem_d * -netmap_mem_private_new(const char *name, u_int txr, u_int txd, - u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr) -{ - struct netmap_mem_d *d = NULL; - struct netmap_obj_params p[NETMAP_POOLS_NR]; - int i, err; - u_int v, maxd; - - d = malloc(sizeof(struct netmap_mem_d), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (d == NULL) { - err = ENOMEM; - goto error; - } - - *d = nm_blueprint; - - err = nm_mem_assign_id(d); - if (err) - goto error; - - /* account for the fake host rings */ - txr++; - rxr++; - - /* copy the min values */ - for (i = 0; i < NETMAP_POOLS_NR; i++) { - p[i] = netmap_min_priv_params[i]; - } - - /* possibly increase them to fit user request */ - v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr); - if (p[NETMAP_IF_POOL].size < v) - p[NETMAP_IF_POOL].size = v; - v = 2 + 4 * npipes; - if (p[NETMAP_IF_POOL].num < v) - p[NETMAP_IF_POOL].num = v; - maxd = (txd > rxd) ? txd : rxd; - v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd; - if (p[NETMAP_RING_POOL].size < v) - p[NETMAP_RING_POOL].size = v; - /* each pipe endpoint needs two tx rings (1 normal + 1 host, fake) - * and two rx rings (again, 1 normal and 1 fake host) - */ - v = txr + rxr + 8 * npipes; - if (p[NETMAP_RING_POOL].num < v) - p[NETMAP_RING_POOL].num = v; - /* for each pipe we only need the buffers for the 4 "real" rings. - * On the other end, the pipe ring dimension may be different from - * the parent port ring dimension. As a compromise, we allocate twice the - * space actually needed if the pipe rings were the same size as the parent rings - */ - v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs; - /* the +2 is for the tx and rx fake buffers (indices 0 and 1) */ - if (p[NETMAP_BUF_POOL].num < v) - p[NETMAP_BUF_POOL].num = v; - - if (netmap_verbose) - D("req if %d*%d ring %d*%d buf %d*%d", - p[NETMAP_IF_POOL].num, - p[NETMAP_IF_POOL].size, - p[NETMAP_RING_POOL].num, - p[NETMAP_RING_POOL].size, - p[NETMAP_BUF_POOL].num, - p[NETMAP_BUF_POOL].size); - - for (i = 0; i < NETMAP_POOLS_NR; i++) { - snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ, - nm_blueprint.pools[i].name, - name); - err = netmap_config_obj_allocator(&d->pools[i], - p[i].num, p[i].size); - if (err) - goto error; - } - - d->flags &= ~NETMAP_MEM_FINALIZED; - - NMA_LOCK_INIT(d); - - return d; -error: - netmap_mem_private_delete(d); - if (perr) - *perr = err; - return NULL; -} - - -/* call with lock held */ -static int -netmap_mem_global_config(struct netmap_mem_d *nmd) -{ - int i; - - if (nmd->refcount) - /* already in use, we cannot change the configuration */ - goto out; - - if (!netmap_memory_config_changed(nmd)) - goto out; - - D("reconfiguring"); - - if (nmd->flags & NETMAP_MEM_FINALIZED) { - /* reset previous allocation */ - for (i = 0; i < NETMAP_POOLS_NR; i++) { - netmap_reset_obj_allocator(&nmd->pools[i]); - } - nmd->flags &= ~NETMAP_MEM_FINALIZED; - } - - for (i = 0; i < NETMAP_POOLS_NR; i++) { - nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i], - netmap_params[i].num, netmap_params[i].size); - if (nmd->lasterr) - goto out; - } - -out: - - return nmd->lasterr; -} - -static int -netmap_mem_global_finalize(struct netmap_mem_d *nmd) -{ - int err; - - NMA_LOCK(nmd); - - - /* update configuration if changed */ - if (netmap_mem_global_config(nmd)) - goto out; - - nmd->refcount++; - - if (nmd->flags & NETMAP_MEM_FINALIZED) { - /* may happen if config is not changed */ - ND("nothing to do"); - goto out; - } - - if (netmap_mem_finalize_all(nmd)) - goto out; - - /* backward compatibility */ - netmap_buf_size = nmd->pools[NETMAP_BUF_POOL]._objsize; - netmap_total_buffers = nmd->pools[NETMAP_BUF_POOL].objtotal; - - netmap_buffer_lut = nmd->pools[NETMAP_BUF_POOL].lut; - netmap_buffer_base = nmd->pools[NETMAP_BUF_POOL].lut[0].vaddr; - - nmd->lasterr = 0; - -out: - if (nmd->lasterr) - nmd->refcount--; - err = nmd->lasterr; - - NMA_UNLOCK(nmd); - - return err; - -} - -int -netmap_mem_init(void) -{ - NMA_LOCK_INIT(&nm_mem); - return (0); -} - -void -netmap_mem_fini(void) -{ - int i; - - for (i = 0; i < NETMAP_POOLS_NR; i++) { - netmap_destroy_obj_allocator(&nm_mem.pools[i]); - } - NMA_LOCK_DESTROY(&nm_mem); -} - -static void -netmap_free_rings(struct netmap_adapter *na) -{ - struct netmap_kring *kring; - struct netmap_ring *ring; - if (!na->tx_rings) - return; - for (kring = na->tx_rings; kring != na->rx_rings; kring++) { - ring = kring->ring; - if (ring == NULL) - continue; - netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); - netmap_ring_free(na->nm_mem, ring); - kring->ring = NULL; - } - for (/* cont'd from above */; kring != na->tailroom; kring++) { - ring = kring->ring; - if (ring == NULL) - continue; - netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); - netmap_ring_free(na->nm_mem, ring); - kring->ring = NULL; - } -} - -/* call with NMA_LOCK held * - * - * Allocate netmap rings and buffers for this card - * The rings are contiguous, but have variable size. - * The kring array must follow the layout described - * in netmap_krings_create(). - */ -int -netmap_mem_rings_create(struct netmap_adapter *na) -{ - struct netmap_ring *ring; - u_int len, ndesc; - struct netmap_kring *kring; - u_int i; - - NMA_LOCK(na->nm_mem); - - /* transmit rings */ - for (i =0, kring = na->tx_rings; kring != na->rx_rings; kring++, i++) { - if (kring->ring) { - ND("%s %ld already created", kring->name, kring - na->tx_rings); - continue; /* already created by somebody else */ - } - ndesc = kring->nkr_num_slots; - len = sizeof(struct netmap_ring) + - ndesc * sizeof(struct netmap_slot); - ring = netmap_ring_malloc(na->nm_mem, len); - if (ring == NULL) { - D("Cannot allocate tx_ring"); - goto cleanup; - } - ND("txring at %p", ring); - kring->ring = ring; - *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; - *(int64_t *)(uintptr_t)&ring->buf_ofs = - (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + - na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - - netmap_ring_offset(na->nm_mem, ring); - - /* copy values from kring */ - ring->head = kring->rhead; - ring->cur = kring->rcur; - ring->tail = kring->rtail; - *(uint16_t *)(uintptr_t)&ring->nr_buf_size = - NETMAP_BDG_BUF_SIZE(na->nm_mem); - ND("%s h %d c %d t %d", kring->name, - ring->head, ring->cur, ring->tail); - ND("initializing slots for txring"); - if (i != na->num_tx_rings || (na->na_flags & NAF_HOST_RINGS)) { - /* this is a real ring */ - if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { - D("Cannot allocate buffers for tx_ring"); - goto cleanup; - } - } else { - /* this is a fake tx ring, set all indices to 0 */ - netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); - } - } - - /* receive rings */ - for ( i = 0 /* kring cont'd from above */ ; kring != na->tailroom; kring++, i++) { - if (kring->ring) { - ND("%s %ld already created", kring->name, kring - na->rx_rings); - continue; /* already created by somebody else */ - } - ndesc = kring->nkr_num_slots; - len = sizeof(struct netmap_ring) + - ndesc * sizeof(struct netmap_slot); - ring = netmap_ring_malloc(na->nm_mem, len); - if (ring == NULL) { - D("Cannot allocate rx_ring"); - goto cleanup; - } - ND("rxring at %p", ring); - kring->ring = ring; - *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; - *(int64_t *)(uintptr_t)&ring->buf_ofs = - (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + - na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - - netmap_ring_offset(na->nm_mem, ring); - - /* copy values from kring */ - ring->head = kring->rhead; - ring->cur = kring->rcur; - ring->tail = kring->rtail; - *(int *)(uintptr_t)&ring->nr_buf_size = - NETMAP_BDG_BUF_SIZE(na->nm_mem); - ND("%s h %d c %d t %d", kring->name, - ring->head, ring->cur, ring->tail); - ND("initializing slots for rxring %p", ring); - if (i != na->num_rx_rings || (na->na_flags & NAF_HOST_RINGS)) { - /* this is a real ring */ - if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { - D("Cannot allocate buffers for rx_ring"); - goto cleanup; - } - } else { - /* this is a fake rx ring, set all indices to 1 */ - netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 1); - } - } - - NMA_UNLOCK(na->nm_mem); - - return 0; - -cleanup: - netmap_free_rings(na); - - NMA_UNLOCK(na->nm_mem); - - return ENOMEM; -} - -void -netmap_mem_rings_delete(struct netmap_adapter *na) -{ - /* last instance, release bufs and rings */ - NMA_LOCK(na->nm_mem); - - netmap_free_rings(na); - - NMA_UNLOCK(na->nm_mem); -} - - -/* call with NMA_LOCK held */ -/* - * Allocate the per-fd structure netmap_if. - * - * We assume that the configuration stored in na - * (number of tx/rx rings and descs) does not change while - * the interface is in netmap mode. - */ -struct netmap_if * -netmap_mem_if_new(const char *ifname, struct netmap_adapter *na) -{ - struct netmap_if *nifp; - ssize_t base; /* handy for relative offsets between rings and nifp */ - u_int i, len, ntx, nrx; - - /* account for the (eventually fake) host rings */ - ntx = na->num_tx_rings + 1; - nrx = na->num_rx_rings + 1; - /* - * the descriptor is followed inline by an array of offsets - * to the tx and rx rings in the shared memory region. - */ - - NMA_LOCK(na->nm_mem); - - len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t); - nifp = netmap_if_malloc(na->nm_mem, len); - if (nifp == NULL) { - NMA_UNLOCK(na->nm_mem); - return NULL; - } - - /* initialize base fields -- override const */ - *(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings; - *(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings; - strncpy(nifp->ni_name, ifname, (size_t)IFNAMSIZ); - - /* - * fill the slots for the rx and tx rings. They contain the offset - * between the ring and nifp, so the information is usable in - * userspace to reach the ring from the nifp. - */ - base = netmap_if_offset(na->nm_mem, nifp); - for (i = 0; i < ntx; i++) { - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = - netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base; - } - for (i = 0; i < nrx; i++) { - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+ntx] = - netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base; - } - - NMA_UNLOCK(na->nm_mem); - - return (nifp); -} - -void -netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) -{ - if (nifp == NULL) - /* nothing to do */ - return; - NMA_LOCK(na->nm_mem); - if (nifp->ni_bufs_head) - netmap_extra_free(na, nifp->ni_bufs_head); - netmap_if_free(na->nm_mem, nifp); - - NMA_UNLOCK(na->nm_mem); -} - -static void -netmap_mem_global_deref(struct netmap_mem_d *nmd) -{ - NMA_LOCK(nmd); - - nmd->refcount--; - if (netmap_verbose) - D("refcount = %d", nmd->refcount); - - NMA_UNLOCK(nmd); -} - -int -netmap_mem_finalize(struct netmap_mem_d *nmd) -{ - return nmd->finalize(nmd); -} - -void -netmap_mem_deref(struct netmap_mem_d *nmd) -{ - return nmd->deref(nmd); -} diff --git a/netmap/sys/dev/netmap/netmap_mem2.h b/netmap/sys/dev/netmap/netmap_mem2.h deleted file mode 100644 index 04248f2..0000000 --- a/netmap/sys/dev/netmap/netmap_mem2.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/netmap_mem2.c 234290 2012-04-14 16:44:18Z luigi $ - * - * (New) memory allocator for netmap - */ - -/* - * This allocator creates three memory pools: - * nm_if_pool for the struct netmap_if - * nm_ring_pool for the struct netmap_ring - * nm_buf_pool for the packet buffers. - * - * that contain netmap objects. Each pool is made of a number of clusters, - * multiple of a page size, each containing an integer number of objects. - * The clusters are contiguous in user space but not in the kernel. - * Only nm_buf_pool needs to be dma-able, - * but for convenience use the same type of allocator for all. - * - * Once mapped, the three pools are exported to userspace - * as a contiguous block, starting from nm_if_pool. Each - * cluster (and pool) is an integral number of pages. - * [ . . . ][ . . . . . .][ . . . . . . . . . .] - * nm_if nm_ring nm_buf - * - * The userspace areas contain offsets of the objects in userspace. - * When (at init time) we write these offsets, we find out the index - * of the object, and from there locate the offset from the beginning - * of the region. - * - * The invididual allocators manage a pool of memory for objects of - * the same size. - * The pool is split into smaller clusters, whose size is a - * multiple of the page size. The cluster size is chosen - * to minimize the waste for a given max cluster size - * (we do it by brute force, as we have relatively few objects - * per cluster). - * - * Objects are aligned to the cache line (64 bytes) rounding up object - * sizes when needed. A bitmap contains the state of each object. - * Allocation scans the bitmap; this is done only on attach, so we are not - * too worried about performance - * - * For each allocator we can define (thorugh sysctl) the size and - * number of each object. Memory is allocated at the first use of a - * netmap file descriptor, and can be freed when all such descriptors - * have been released (including unmapping the memory). - * If memory is scarce, the system tries to get as much as possible - * and the sysctl values reflect the actual allocation. - * Together with desired values, the sysctl export also absolute - * min and maximum values that cannot be overridden. - * - * struct netmap_if: - * variable size, max 16 bytes per ring pair plus some fixed amount. - * 1024 bytes should be large enough in practice. - * - * In the worst case we have one netmap_if per ring in the system. - * - * struct netmap_ring - * variable size, 8 byte per slot plus some fixed amount. - * Rings can be large (e.g. 4k slots, or >32Kbytes). - * We default to 36 KB (9 pages), and a few hundred rings. - * - * struct netmap_buffer - * The more the better, both because fast interfaces tend to have - * many slots, and because we may want to use buffers to store - * packets in userspace avoiding copies. - * Must contain a full frame (eg 1518, or more for vlans, jumbo - * frames etc.) plus be nicely aligned, plus some NICs restrict - * the size to multiple of 1K or so. Default to 2K - */ -#ifndef _NET_NETMAP_MEM2_H_ -#define _NET_NETMAP_MEM2_H_ - - -#define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */ - -#define NETMAP_POOL_MAX_NAMSZ 32 - - -enum { - NETMAP_IF_POOL = 0, - NETMAP_RING_POOL, - NETMAP_BUF_POOL, - NETMAP_POOLS_NR -}; - - -struct netmap_obj_params { - u_int size; - u_int num; -}; -struct netmap_obj_pool { - char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */ - - /* ---------------------------------------------------*/ - /* these are only meaningful if the pool is finalized */ - /* (see 'finalized' field in netmap_mem_d) */ - u_int objtotal; /* actual total number of objects. */ - u_int memtotal; /* actual total memory space */ - u_int numclusters; /* actual number of clusters */ - - u_int objfree; /* number of free objects. */ - - struct lut_entry *lut; /* virt,phys addresses, objtotal entries */ - uint32_t *bitmap; /* one bit per buffer, 1 means free */ - uint32_t bitmap_slots; /* number of uint32 entries in bitmap */ - /* ---------------------------------------------------*/ - - /* limits */ - u_int objminsize; /* minimum object size */ - u_int objmaxsize; /* maximum object size */ - u_int nummin; /* minimum number of objects */ - u_int nummax; /* maximum number of objects */ - - /* these are changed only by config */ - u_int _objtotal; /* total number of objects */ - u_int _objsize; /* object size */ - u_int _clustsize; /* cluster size */ - u_int _clustentries; /* objects per cluster */ - u_int _numclusters; /* number of clusters */ - - /* requested values */ - u_int r_objtotal; - u_int r_objsize; -}; - -#ifdef linux -// XXX a mtx would suffice here 20130415 lr -#define NMA_LOCK_T struct semaphore -#else /* !linux */ -#define NMA_LOCK_T struct mtx -#endif /* linux */ - -typedef int (*netmap_mem_config_t)(struct netmap_mem_d*); -typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*); -typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*); - -typedef uint16_t nm_memid_t; - -/* We implement two kinds of netmap_mem_d structures: - * - * - global: used by hardware NICS; - * - * - private: used by VALE ports. - * - * In both cases, the netmap_mem_d structure has the same lifetime as the - * netmap_adapter of the corresponding NIC or port. It is the responsibility of - * the client code to delete the private allocator when the associated - * netmap_adapter is freed (this is implemented by the NAF_MEM_OWNER flag in - * netmap.c). The 'refcount' field counts the number of active users of the - * structure. The global allocator uses this information to prevent/allow - * reconfiguration. The private allocators release all their memory when there - * are no active users. By 'active user' we mean an existing netmap_priv - * structure holding a reference to the allocator. - */ -struct netmap_mem_d { - NMA_LOCK_T nm_mtx; /* protect the allocator */ - u_int nm_totalsize; /* shorthand */ - - u_int flags; -#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */ -#define NETMAP_MEM_PRIVATE 0x2 /* uses private address space */ - int lasterr; /* last error for curr config */ - int refcount; /* existing priv structures */ - /* the three allocators */ - struct netmap_obj_pool pools[NETMAP_POOLS_NR]; - - netmap_mem_config_t config; - netmap_mem_finalize_t finalize; - netmap_mem_deref_t deref; - - nm_memid_t nm_id; /* allocator identifier */ - - /* list of all existing allocators, sorted by nm_id */ - struct netmap_mem_d *prev, *next; -}; - -extern struct netmap_mem_d nm_mem; - -vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t); -int netmap_mem_finalize(struct netmap_mem_d *); -int netmap_mem_init(void); -void netmap_mem_fini(void); -struct netmap_if * - netmap_mem_if_new(const char *, struct netmap_adapter *); -void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *); -int netmap_mem_rings_create(struct netmap_adapter *); -void netmap_mem_rings_delete(struct netmap_adapter *); -void netmap_mem_deref(struct netmap_mem_d *); -int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id); -ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr); -struct netmap_mem_d* netmap_mem_private_new(const char *name, - u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, - int* error); -void netmap_mem_private_delete(struct netmap_mem_d *); - -#define NETMAP_BDG_BUF_SIZE(n) ((n)->pools[NETMAP_BUF_POOL]._objsize) - -uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n); - - -#endif diff --git a/netmap/sys/dev/netmap/netmap_offloadings.c b/netmap/sys/dev/netmap/netmap_offloadings.c deleted file mode 100644 index 3e1ecb6..0000000 --- a/netmap/sys/dev/netmap/netmap_offloadings.c +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright (C) 2014 Vincenzo Maffione. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* $FreeBSD: head/sys/dev/netmap/netmap_offloadings.c 261909 2014-02-15 04:53:04Z luigi $ */ - -#if defined(__FreeBSD__) -#include /* prerequisite */ - -#include -#include -#include /* defines used in kernel.h */ -#include /* types used in module initialization */ -#include -#include /* struct socket */ -#include /* sockaddrs */ -#include -#include -#include /* bus_dmamap_* */ -#include - -#elif defined(linux) - -#include "bsd_glue.h" - -#elif defined(__APPLE__) - -#warning OSX support is only partial -#include "osx_glue.h" - -#else - -#error Unsupported platform - -#endif /* unsupported */ - -#include -#include - - - -/* This routine is called by bdg_mismatch_datapath() when it finishes - * accumulating bytes for a segment, in order to fix some fields in the - * segment headers (which still contain the same content as the header - * of the original GSO packet). 'buf' points to the beginning (e.g. - * the ethernet header) of the segment, and 'len' is its length. - */ -static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx, - u_int segmented_bytes, u_int last_segment, - u_int tcp, u_int iphlen) -{ - struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14); - struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14); - uint16_t *check = NULL; - uint8_t *check_data = NULL; - - if (iphlen == 20) { - /* Set the IPv4 "Total Length" field. */ - iph->tot_len = htobe16(len-14); - ND("ip total length %u", be16toh(ip->tot_len)); - - /* Set the IPv4 "Identification" field. */ - iph->id = htobe16(be16toh(iph->id) + idx); - ND("ip identification %u", be16toh(iph->id)); - - /* Compute and insert the IPv4 header checksum. */ - iph->check = 0; - iph->check = nm_csum_ipv4(iph); - ND("IP csum %x", be16toh(iph->check)); - } else {/* if (iphlen == 40) */ - /* Set the IPv6 "Payload Len" field. */ - ip6h->payload_len = htobe16(len-14-iphlen); - } - - if (tcp) { - struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen); - - /* Set the TCP sequence number. */ - tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes); - ND("tcp seq %u", be32toh(tcph->seq)); - - /* Zero the PSH and FIN TCP flags if this is not the last - segment. */ - if (!last_segment) - tcph->flags &= ~(0x8 | 0x1); - ND("last_segment %u", last_segment); - - check = &tcph->check; - check_data = (uint8_t *)tcph; - } else { /* UDP */ - struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen); - - /* Set the UDP 'Length' field. */ - udph->len = htobe16(len-14-iphlen); - - check = &udph->check; - check_data = (uint8_t *)udph; - } - - /* Compute and insert TCP/UDP checksum. */ - *check = 0; - if (iphlen == 20) - nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check); - else - nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check); - - ND("TCP/UDP csum %x", be16toh(*check)); -} - - -/* The VALE mismatch datapath implementation. */ -void bdg_mismatch_datapath(struct netmap_vp_adapter *na, - struct netmap_vp_adapter *dst_na, - struct nm_bdg_fwd *ft_p, struct netmap_ring *ring, - u_int *j, u_int lim, u_int *howmany) -{ - struct netmap_slot *slot = NULL; - struct nm_vnet_hdr *vh = NULL; - /* Number of source slots to process. */ - u_int frags = ft_p->ft_frags; - struct nm_bdg_fwd *ft_end = ft_p + frags; - - /* Source and destination pointers. */ - uint8_t *dst, *src; - size_t src_len, dst_len; - - u_int j_start = *j; - u_int dst_slots = 0; - - /* If the source port uses the offloadings, while destination doesn't, - * we grab the source virtio-net header and do the offloadings here. - */ - if (na->virt_hdr_len && !dst_na->virt_hdr_len) { - vh = (struct nm_vnet_hdr *)ft_p->ft_buf; - } - - /* Init source and dest pointers. */ - src = ft_p->ft_buf; - src_len = ft_p->ft_len; - slot = &ring->slot[*j]; - dst = BDG_NMB(&dst_na->up, slot); - dst_len = src_len; - - /* We are processing the first input slot and there is a mismatch - * between source and destination virt_hdr_len (SHL and DHL). - * When the a client is using virtio-net headers, the header length - * can be: - * - 10: the header corresponds to the struct nm_vnet_hdr - * - 12: the first 10 bytes correspond to the struct - * virtio_net_hdr, and the last 2 bytes store the - * "mergeable buffers" info, which is an optional - * hint that can be zeroed for compability - * - * The destination header is therefore built according to the - * following table: - * - * SHL | DHL | destination header - * ----------------------------- - * 0 | 10 | zero - * 0 | 12 | zero - * 10 | 0 | doesn't exist - * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero - * 12 | 0 | doesn't exist - * 12 | 10 | copied from the first 10 bytes of source header - */ - bzero(dst, dst_na->virt_hdr_len); - if (na->virt_hdr_len && dst_na->virt_hdr_len) - memcpy(dst, src, sizeof(struct nm_vnet_hdr)); - /* Skip the virtio-net headers. */ - src += na->virt_hdr_len; - src_len -= na->virt_hdr_len; - dst += dst_na->virt_hdr_len; - dst_len = dst_na->virt_hdr_len + src_len; - - /* Here it could be dst_len == 0 (which implies src_len == 0), - * so we avoid passing a zero length fragment. - */ - if (dst_len == 0) { - ft_p++; - src = ft_p->ft_buf; - src_len = ft_p->ft_len; - dst_len = src_len; - } - - if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - u_int gso_bytes = 0; - /* Length of the GSO packet header. */ - u_int gso_hdr_len = 0; - /* Pointer to the GSO packet header. Assume it is in a single fragment. */ - uint8_t *gso_hdr = NULL; - /* Index of the current segment. */ - u_int gso_idx = 0; - /* Payload data bytes segmented so far (e.g. TCP data bytes). */ - u_int segmented_bytes = 0; - /* Length of the IP header (20 if IPv4, 40 if IPv6). */ - u_int iphlen = 0; - /* Is this a TCP or an UDP GSO packet? */ - u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) - == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1; - - /* Segment the GSO packet contained into the input slots (frags). */ - while (ft_p != ft_end) { - size_t copy; - - /* Grab the GSO header if we don't have it. */ - if (!gso_hdr) { - uint16_t ethertype; - - gso_hdr = src; - - /* Look at the 'Ethertype' field to see if this packet - * is IPv4 or IPv6. - */ - ethertype = be16toh(*((uint16_t *)(gso_hdr + 12))); - if (ethertype == 0x0800) - iphlen = 20; - else /* if (ethertype == 0x86DD) */ - iphlen = 40; - ND(3, "type=%04x", ethertype); - - /* Compute gso_hdr_len. For TCP we need to read the - * content of the 'Data Offset' field. - */ - if (tcp) { - struct nm_tcphdr *tcph = - (struct nm_tcphdr *)&gso_hdr[14+iphlen]; - - gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4); - } else - gso_hdr_len = 14 + iphlen + 8; /* UDP */ - - ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len, - dst_na->mfs); - - /* Advance source pointers. */ - src += gso_hdr_len; - src_len -= gso_hdr_len; - if (src_len == 0) { - ft_p++; - if (ft_p == ft_end) - break; - src = ft_p->ft_buf; - src_len = ft_p->ft_len; - continue; - } - } - - /* Fill in the header of the current segment. */ - if (gso_bytes == 0) { - memcpy(dst, gso_hdr, gso_hdr_len); - gso_bytes = gso_hdr_len; - } - - /* Fill in data and update source and dest pointers. */ - copy = src_len; - if (gso_bytes + copy > dst_na->mfs) - copy = dst_na->mfs - gso_bytes; - memcpy(dst + gso_bytes, src, copy); - gso_bytes += copy; - src += copy; - src_len -= copy; - - /* A segment is complete or we have processed all the - the GSO payload bytes. */ - if (gso_bytes >= dst_na->mfs || - (src_len == 0 && ft_p + 1 == ft_end)) { - /* After raw segmentation, we must fix some header - * fields and compute checksums, in a protocol dependent - * way. */ - gso_fix_segment(dst, gso_bytes, gso_idx, - segmented_bytes, - src_len == 0 && ft_p + 1 == ft_end, - tcp, iphlen); - - ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes); - slot->len = gso_bytes; - slot->flags = 0; - segmented_bytes += gso_bytes - gso_hdr_len; - - dst_slots++; - - /* Next destination slot. */ - *j = nm_next(*j, lim); - slot = &ring->slot[*j]; - dst = BDG_NMB(&dst_na->up, slot); - - gso_bytes = 0; - gso_idx++; - } - - /* Next input slot. */ - if (src_len == 0) { - ft_p++; - if (ft_p == ft_end) - break; - src = ft_p->ft_buf; - src_len = ft_p->ft_len; - } - } - ND(3, "%d bytes segmented", segmented_bytes); - - } else { - /* Address of a checksum field into a destination slot. */ - uint16_t *check = NULL; - /* Accumulator for an unfolded checksum. */ - rawsum_t csum = 0; - - /* Process a non-GSO packet. */ - - /* Init 'check' if necessary. */ - if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { - if (unlikely(vh->csum_offset + vh->csum_start > src_len)) - D("invalid checksum request"); - else - check = (uint16_t *)(dst + vh->csum_start + - vh->csum_offset); - } - - while (ft_p != ft_end) { - /* Init/update the packet checksum if needed. */ - if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { - if (!dst_slots) - csum = nm_csum_raw(src + vh->csum_start, - src_len - vh->csum_start, 0); - else - csum = nm_csum_raw(src, src_len, csum); - } - - /* Round to a multiple of 64 */ - src_len = (src_len + 63) & ~63; - - if (ft_p->ft_flags & NS_INDIRECT) { - if (copyin(src, dst, src_len)) { - /* Invalid user pointer, pretend len is 0. */ - dst_len = 0; - } - } else { - memcpy(dst, src, (int)src_len); - } - slot->len = dst_len; - - dst_slots++; - - /* Next destination slot. */ - *j = nm_next(*j, lim); - slot = &ring->slot[*j]; - dst = BDG_NMB(&dst_na->up, slot); - - /* Next source slot. */ - ft_p++; - src = ft_p->ft_buf; - dst_len = src_len = ft_p->ft_len; - - } - - /* Finalize (fold) the checksum if needed. */ - if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { - *check = nm_csum_fold(csum); - } - ND(3, "using %u dst_slots", dst_slots); - - /* A second pass on the desitations slots to set the slot flags, - * using the right number of destination slots. - */ - while (j_start != *j) { - slot = &ring->slot[j_start]; - slot->flags = (dst_slots << 8)| NS_MOREFRAG; - j_start = nm_next(j_start, lim); - } - /* Clear NS_MOREFRAG flag on last entry. */ - slot->flags = (dst_slots << 8); - } - - /* Update howmany. */ - if (unlikely(dst_slots > *howmany)) { - dst_slots = *howmany; - D("Slot allocation error: Should never happen"); - } - *howmany -= dst_slots; -} diff --git a/netmap/sys/dev/netmap/netmap_pipe.c b/netmap/sys/dev/netmap/netmap_pipe.c deleted file mode 100644 index 3434e70..0000000 --- a/netmap/sys/dev/netmap/netmap_pipe.c +++ /dev/null @@ -1,708 +0,0 @@ -/* - * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -#if defined(__FreeBSD__) -#include /* prerequisite */ - -#include -#include -#include /* defines used in kernel.h */ -#include /* types used in module initialization */ -#include -#include -#include -#include -#include -#include -#include /* sockaddrs */ -#include -#include -#include /* bus_dmamap_* */ -#include - - -#elif defined(linux) - -#include "bsd_glue.h" - -#elif defined(__APPLE__) - -#warning OSX support is only partial -#include "osx_glue.h" - -#else - -#error Unsupported platform - -#endif /* unsupported */ - -/* - * common headers - */ - -#include -#include -#include - -#ifdef WITH_PIPES - -#define NM_PIPE_MAXSLOTS 4096 - -int netmap_default_pipes = 0; /* default number of pipes for each nic */ -SYSCTL_DECL(_dev_netmap); -SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , ""); - -/* allocate the pipe array in the parent adapter */ -int -netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr) -{ - size_t len; - int mode = nmr->nr_flags & NR_REG_MASK; - u_int npipes; - - if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) { - /* this is for our parent, not for us */ - return 0; - } - - /* TODO: we can resize the array if the new - * request can accomodate the already existing pipes - */ - if (na->na_pipes) { - nmr->nr_arg1 = na->na_max_pipes; - return 0; - } - - npipes = nmr->nr_arg1; - if (npipes == 0) - npipes = netmap_default_pipes; - nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL); - - if (npipes == 0) { - /* really zero, nothing to alloc */ - goto out; - } - - len = sizeof(struct netmap_pipe_adapter *) * npipes; - na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); - if (na->na_pipes == NULL) - return ENOMEM; - - na->na_max_pipes = npipes; - na->na_next_pipe = 0; - -out: - nmr->nr_arg1 = npipes; - - return 0; -} - -/* deallocate the parent array in the parent adapter */ -void -netmap_pipe_dealloc(struct netmap_adapter *na) -{ - if (na->na_pipes) { - ND("freeing pipes for %s", NM_IFPNAME(na->ifp)); - free(na->na_pipes, M_DEVBUF); - na->na_pipes = NULL; - na->na_max_pipes = 0; - na->na_next_pipe = 0; - } -} - -/* find a pipe endpoint with the given id among the parent's pipes */ -static struct netmap_pipe_adapter * -netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id) -{ - int i; - struct netmap_pipe_adapter *na; - - for (i = 0; i < parent->na_next_pipe; i++) { - na = parent->na_pipes[i]; - if (na->id == pipe_id) { - return na; - } - } - return NULL; -} - -/* add a new pipe endpoint to the parent array */ -static int -netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na) -{ - if (parent->na_next_pipe >= parent->na_max_pipes) { - D("%s: no space left for pipes", NM_IFPNAME(parent->ifp)); - return ENOMEM; - } - - parent->na_pipes[parent->na_next_pipe] = na; - na->parent_slot = parent->na_next_pipe; - parent->na_next_pipe++; - return 0; -} - -/* remove the given pipe endpoint from the parent array */ -static void -netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na) -{ - u_int n; - n = --parent->na_next_pipe; - if (n != na->parent_slot) { - parent->na_pipes[na->parent_slot] = - parent->na_pipes[n]; - } - parent->na_pipes[n] = NULL; -} - -static int -netmap_pipe_txsync(struct netmap_kring *txkring, int flags) -{ - struct netmap_kring *rxkring = txkring->pipe; - u_int limit; /* slots to transfer */ - u_int j, k, lim_tx = txkring->nkr_num_slots - 1, - lim_rx = rxkring->nkr_num_slots - 1; - int m, busy; - - ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); - ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail, - txkring->rcur, txkring->rhead, txkring->rtail); - - j = rxkring->nr_hwtail; /* RX */ - k = txkring->nr_hwcur; /* TX */ - m = txkring->rhead - txkring->nr_hwcur; /* new slots */ - if (m < 0) - m += txkring->nkr_num_slots; - limit = m; - m = rxkring->nkr_num_slots - 1; /* max avail space on destination */ - busy = j - rxkring->nr_hwcur; /* busy slots */ - if (busy < 0) - busy += txkring->nkr_num_slots; - m -= busy; /* subtract busy slots */ - ND(2, "m %d limit %d", m, limit); - if (m < limit) - limit = m; - - if (limit == 0) { - /* either the rxring is full, or nothing to send */ - nm_txsync_finalize(txkring); /* actually useless */ - return 0; - } - - while (limit-- > 0) { - struct netmap_slot *rs = &rxkring->save_ring->slot[j]; - struct netmap_slot *ts = &txkring->ring->slot[k]; - struct netmap_slot tmp; - - /* swap the slots */ - tmp = *rs; - *rs = *ts; - *ts = tmp; - - /* no need to report the buffer change */ - - j = nm_next(j, lim_rx); - k = nm_next(k, lim_tx); - } - - wmb(); /* make sure the slots are updated before publishing them */ - rxkring->nr_hwtail = j; - txkring->nr_hwcur = k; - txkring->nr_hwtail = nm_prev(k, lim_tx); - - nm_txsync_finalize(txkring); - ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail, - txkring->rcur, txkring->rhead, txkring->rtail, j); - - wmb(); /* make sure rxkring->nr_hwtail is updated before notifying */ - rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0); - - return 0; -} - -static int -netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags) -{ - struct netmap_kring *txkring = rxkring->pipe; - uint32_t oldhwcur = rxkring->nr_hwcur; - - ND("%s %x <- %s", rxkring->name, flags, txkring->name); - rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */ - ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail, - rxkring->rcur, rxkring->rhead, rxkring->rtail); - rmb(); /* paired with the first wmb() in txsync */ - nm_rxsync_finalize(rxkring); - - if (oldhwcur != rxkring->nr_hwcur) { - /* we have released some slots, notify the other end */ - wmb(); /* make sure nr_hwcur is updated before notifying */ - txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0); - } - return 0; -} - -/* Pipe endpoints are created and destroyed together, so that endopoints do not - * have to check for the existence of their peer at each ?xsync. - * - * To play well with the existing netmap infrastructure (refcounts etc.), we - * adopt the following strategy: - * - * 1) The first endpoint that is created also creates the other endpoint and - * grabs a reference to it. - * - * state A) user1 --> endpoint1 --> endpoint2 - * - * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives - * its reference to the user: - * - * state B) user1 --> endpoint1 endpoint2 <--- user2 - * - * 3) Assume that, starting from state B endpoint2 is closed. In the unregister - * callback endpoint2 notes that endpoint1 is still active and adds a reference - * from endpoint1 to itself. When user2 then releases her own reference, - * endpoint2 is not destroyed and we are back to state A. A symmetrical state - * would be reached if endpoint1 were released instead. - * - * 4) If, starting from state A, endpoint1 is closed, the destructor notes that - * it owns a reference to endpoint2 and releases it. - * - * Something similar goes on for the creation and destruction of the krings. - */ - - -/* netmap_pipe_krings_delete. - * - * There are two cases: - * - * 1) state is - * - * usr1 --> e1 --> e2 - * - * and we are e1. We have to create both sets - * of krings. - * - * 2) state is - * - * usr1 --> e1 --> e2 - * - * and we are e2. e1 is certainly registered and our - * krings already exist, but they may be hidden. - */ -static int -netmap_pipe_krings_create(struct netmap_adapter *na) -{ - struct netmap_pipe_adapter *pna = - (struct netmap_pipe_adapter *)na; - struct netmap_adapter *ona = &pna->peer->up; - int error = 0; - if (pna->peer_ref) { - int i; - - /* case 1) above */ - D("%p: case 1, create everything", na); - error = netmap_krings_create(na, 0); - if (error) - goto err; - - /* we also create all the rings, since we need to - * update the save_ring pointers. - * netmap_mem_rings_create (called by our caller) - * will not create the rings again - */ - - error = netmap_mem_rings_create(na); - if (error) - goto del_krings1; - - /* update our hidden ring pointers */ - for (i = 0; i < na->num_tx_rings + 1; i++) - na->tx_rings[i].save_ring = na->tx_rings[i].ring; - for (i = 0; i < na->num_rx_rings + 1; i++) - na->rx_rings[i].save_ring = na->rx_rings[i].ring; - - /* now, create krings and rings of the other end */ - error = netmap_krings_create(ona, 0); - if (error) - goto del_rings1; - - error = netmap_mem_rings_create(ona); - if (error) - goto del_krings2; - - for (i = 0; i < ona->num_tx_rings + 1; i++) - ona->tx_rings[i].save_ring = ona->tx_rings[i].ring; - for (i = 0; i < ona->num_rx_rings + 1; i++) - ona->rx_rings[i].save_ring = ona->rx_rings[i].ring; - - /* cross link the krings */ - for (i = 0; i < na->num_tx_rings; i++) { - na->tx_rings[i].pipe = pna->peer->up.rx_rings + i; - na->rx_rings[i].pipe = pna->peer->up.tx_rings + i; - pna->peer->up.tx_rings[i].pipe = na->rx_rings + i; - pna->peer->up.rx_rings[i].pipe = na->tx_rings + i; - } - } else { - int i; - /* case 2) above */ - /* recover the hidden rings */ - ND("%p: case 2, hidden rings", na); - for (i = 0; i < na->num_tx_rings + 1; i++) - na->tx_rings[i].ring = na->tx_rings[i].save_ring; - for (i = 0; i < na->num_rx_rings + 1; i++) - na->rx_rings[i].ring = na->rx_rings[i].save_ring; - } - return 0; - -del_krings2: - netmap_krings_delete(ona); -del_rings1: - netmap_mem_rings_delete(na); -del_krings1: - netmap_krings_delete(na); -err: - return error; -} - -/* netmap_pipe_reg. - * - * There are two cases on registration (onoff==1) - * - * 1.a) state is - * - * usr1 --> e1 --> e2 - * - * and we are e1. Nothing special to do. - * - * 1.b) state is - * - * usr1 --> e1 --> e2 <-- usr2 - * - * and we are e2. Drop the ref e1 is holding. - * - * There are two additional cases on unregister (onoff==0) - * - * 2.a) state is - * - * usr1 --> e1 --> e2 - * - * and we are e1. Nothing special to do, e2 will - * be cleaned up by the destructor of e1. - * - * 2.b) state is - * - * usr1 --> e1 e2 <-- usr2 - * - * and we are either e1 or e2. Add a ref from the - * other end and hide our rings. - */ -static int -netmap_pipe_reg(struct netmap_adapter *na, int onoff) -{ - struct netmap_pipe_adapter *pna = - (struct netmap_pipe_adapter *)na; - struct ifnet *ifp = na->ifp; - ND("%p: onoff %d", na, onoff); - if (onoff) { - ifp->if_capenable |= IFCAP_NETMAP; - } else { - ifp->if_capenable &= ~IFCAP_NETMAP; - } - if (pna->peer_ref) { - ND("%p: case 1.a or 2.a, nothing to do", na); - return 0; - } - if (onoff) { - ND("%p: case 1.b, drop peer", na); - pna->peer->peer_ref = 0; - netmap_adapter_put(na); - } else { - int i; - ND("%p: case 2.b, grab peer", na); - netmap_adapter_get(na); - pna->peer->peer_ref = 1; - /* hide our rings from netmap_mem_rings_delete */ - for (i = 0; i < na->num_tx_rings + 1; i++) { - na->tx_rings[i].ring = NULL; - } - for (i = 0; i < na->num_rx_rings + 1; i++) { - na->rx_rings[i].ring = NULL; - } - } - return 0; -} - -/* netmap_pipe_krings_delete. - * - * There are two cases: - * - * 1) state is - * - * usr1 --> e1 --> e2 - * - * and we are e1 (e2 is not registered, so krings_delete cannot be - * called on it); - * - * 2) state is - * - * usr1 --> e1 e2 <-- usr2 - * - * and we are either e1 or e2. - * - * In the former case we have to also delete the krings of e2; - * in the latter case we do nothing (note that our krings - * have already been hidden in the unregister callback). - */ -static void -netmap_pipe_krings_delete(struct netmap_adapter *na) -{ - struct netmap_pipe_adapter *pna = - (struct netmap_pipe_adapter *)na; - struct netmap_adapter *ona; /* na of the other end */ - int i; - - if (!pna->peer_ref) { - ND("%p: case 2, kept alive by peer", na); - return; - } - /* case 1) above */ - ND("%p: case 1, deleting everyhing", na); - netmap_krings_delete(na); /* also zeroes tx_rings etc. */ - /* restore the ring to be deleted on the peer */ - ona = &pna->peer->up; - if (ona->tx_rings == NULL) { - /* already deleted, we must be on an - * cleanup-after-error path */ - return; - } - for (i = 0; i < ona->num_tx_rings + 1; i++) - ona->tx_rings[i].ring = ona->tx_rings[i].save_ring; - for (i = 0; i < ona->num_rx_rings + 1; i++) - ona->rx_rings[i].ring = ona->rx_rings[i].save_ring; - netmap_mem_rings_delete(ona); - netmap_krings_delete(ona); -} - - -static void -netmap_pipe_dtor(struct netmap_adapter *na) -{ - struct netmap_pipe_adapter *pna = - (struct netmap_pipe_adapter *)na; - ND("%p", na); - if (pna->peer_ref) { - ND("%p: clean up peer", na); - pna->peer_ref = 0; - netmap_adapter_put(&pna->peer->up); - } - if (pna->role == NR_REG_PIPE_MASTER) - netmap_pipe_remove(pna->parent, pna); - netmap_adapter_put(pna->parent); - free(na->ifp, M_DEVBUF); - na->ifp = NULL; - pna->parent = NULL; -} - -int -netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create) -{ - struct nmreq pnmr; - struct netmap_adapter *pna; /* parent adapter */ - struct netmap_pipe_adapter *mna, *sna, *req; - struct ifnet *ifp, *ifp2; - u_int pipe_id; - int role = nmr->nr_flags & NR_REG_MASK; - int error; - - ND("flags %x", nmr->nr_flags); - - if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) { - ND("not a pipe"); - return 0; - } - role = nmr->nr_flags & NR_REG_MASK; - - /* first, try to find the parent adapter */ - bzero(&pnmr, sizeof(pnmr)); - memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ); - /* pass to parent the requested number of pipes */ - pnmr.nr_arg1 = nmr->nr_arg1; - error = netmap_get_na(&pnmr, &pna, create); - if (error) { - ND("parent lookup failed: %d", error); - return error; - } - ND("found parent: %s", NM_IFPNAME(pna->ifp)); - - if (NETMAP_OWNED_BY_KERN(pna)) { - ND("parent busy"); - error = EBUSY; - goto put_out; - } - - /* next, lookup the pipe id in the parent list */ - req = NULL; - pipe_id = nmr->nr_ringid & NETMAP_RING_MASK; - mna = netmap_pipe_find(pna, pipe_id); - if (mna) { - if (mna->role == role) { - ND("found %d directly at %d", pipe_id, mna->parent_slot); - req = mna; - } else { - ND("found %d indirectly at %d", pipe_id, mna->parent_slot); - req = mna->peer; - } - /* the pipe we have found already holds a ref to the parent, - * so we need to drop the one we got from netmap_get_na() - */ - netmap_adapter_put(pna); - goto found; - } - ND("pipe %d not found, create %d", pipe_id, create); - if (!create) { - error = ENODEV; - goto put_out; - } - /* we create both master and slave. - * The endpoint we were asked for holds a reference to - * the other one. - */ - ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); - if (!ifp) { - error = ENOMEM; - goto put_out; - } - strcpy(ifp->if_xname, NM_IFPNAME(pna->ifp)); - - mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO); - if (mna == NULL) { - error = ENOMEM; - goto free_ifp; - } - mna->up.ifp = ifp; - - mna->id = pipe_id; - mna->role = NR_REG_PIPE_MASTER; - mna->parent = pna; - - mna->up.nm_txsync = netmap_pipe_txsync; - mna->up.nm_rxsync = netmap_pipe_rxsync; - mna->up.nm_register = netmap_pipe_reg; - mna->up.nm_dtor = netmap_pipe_dtor; - mna->up.nm_krings_create = netmap_pipe_krings_create; - mna->up.nm_krings_delete = netmap_pipe_krings_delete; - mna->up.nm_mem = pna->nm_mem; - mna->up.na_lut = pna->na_lut; - mna->up.na_lut_objtotal = pna->na_lut_objtotal; - - mna->up.num_tx_rings = 1; - mna->up.num_rx_rings = 1; - mna->up.num_tx_desc = nmr->nr_tx_slots; - nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, - 1, NM_PIPE_MAXSLOTS, NULL); - mna->up.num_rx_desc = nmr->nr_rx_slots; - nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, - 1, NM_PIPE_MAXSLOTS, NULL); - error = netmap_attach_common(&mna->up); - if (error) - goto free_mna; - /* register the master with the parent */ - error = netmap_pipe_add(pna, mna); - if (error) - goto free_mna; - - /* create the slave */ - ifp2 = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); - if (!ifp) { - error = ENOMEM; - goto free_mna; - } - strcpy(ifp2->if_xname, NM_IFPNAME(pna->ifp)); - - sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO); - if (sna == NULL) { - error = ENOMEM; - goto free_ifp2; - } - /* most fields are the same, copy from master and then fix */ - *sna = *mna; - sna->up.ifp = ifp2; - sna->role = NR_REG_PIPE_SLAVE; - error = netmap_attach_common(&sna->up); - if (error) - goto free_sna; - - /* join the two endpoints */ - mna->peer = sna; - sna->peer = mna; - - /* we already have a reference to the parent, but we - * need another one for the other endpoint we created - */ - netmap_adapter_get(pna); - - if (role == NR_REG_PIPE_MASTER) { - req = mna; - mna->peer_ref = 1; - netmap_adapter_get(&sna->up); - } else { - req = sna; - sna->peer_ref = 1; - netmap_adapter_get(&mna->up); - } - ND("created master %p and slave %p", mna, sna); -found: - - ND("pipe %d %s at %p", pipe_id, - (req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req); - *na = &req->up; - netmap_adapter_get(*na); - - /* write the configuration back */ - nmr->nr_tx_rings = req->up.num_tx_rings; - nmr->nr_rx_rings = req->up.num_rx_rings; - nmr->nr_tx_slots = req->up.num_tx_desc; - nmr->nr_rx_slots = req->up.num_rx_desc; - - /* keep the reference to the parent. - * It will be released by the req destructor - */ - - return 0; - -free_sna: - free(sna, M_DEVBUF); -free_ifp2: - free(ifp2, M_DEVBUF); -free_mna: - free(mna, M_DEVBUF); -free_ifp: - free(ifp, M_DEVBUF); -put_out: - netmap_adapter_put(pna); - return error; -} - - -#endif /* WITH_PIPES */ diff --git a/netmap/sys/dev/netmap/netmap_vale.c b/netmap/sys/dev/netmap/netmap_vale.c deleted file mode 100644 index 2cd84c6..0000000 --- a/netmap/sys/dev/netmap/netmap_vale.c +++ /dev/null @@ -1,2103 +0,0 @@ -/* - * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -/* - * This module implements the VALE switch for netmap - ---- VALE SWITCH --- - -NMG_LOCK() serializes all modifications to switches and ports. -A switch cannot be deleted until all ports are gone. - -For each switch, an SX lock (RWlock on linux) protects -deletion of ports. When configuring or deleting a new port, the -lock is acquired in exclusive mode (after holding NMG_LOCK). -When forwarding, the lock is acquired in shared mode (without NMG_LOCK). -The lock is held throughout the entire forwarding cycle, -during which the thread may incur in a page fault. -Hence it is important that sleepable shared locks are used. - -On the rx ring, the per-port lock is grabbed initially to reserve -a number of slot in the ring, then the lock is released, -packets are copied from source to destination, and then -the lock is acquired again and the receive ring is updated. -(A similar thing is done on the tx ring for NIC and host stack -ports attached to the switch) - - */ - -/* - * OS-specific code that is used only within this file. - * Other OS-specific code that must be accessed by drivers - * is present in netmap_kern.h - */ - -#if defined(__FreeBSD__) -#include /* prerequisite */ -__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $"); - -#include -#include -#include /* defines used in kernel.h */ -#include /* types used in module initialization */ -#include /* cdevsw struct, UID, GID */ -#include -#include /* struct socket */ -#include -#include -#include -#include /* sockaddrs */ -#include -#include -#include -#include -#include /* BIOCIMMEDIATE */ -#include /* bus_dmamap_* */ -#include -#include - - -#define BDG_RWLOCK_T struct rwlock // struct rwlock - -#define BDG_RWINIT(b) \ - rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) -#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) -#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) -#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) -#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) -#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) -#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) - - -#elif defined(linux) - -#include "bsd_glue.h" - -#elif defined(__APPLE__) - -#warning OSX support is only partial -#include "osx_glue.h" - -#else - -#error Unsupported platform - -#endif /* unsupported */ - -/* - * common headers - */ - -#include -#include -#include - -#ifdef WITH_VALE - -/* - * system parameters (most of them in netmap_kern.h) - * NM_NAME prefix for switch port names, default "vale" - * NM_BDG_MAXPORTS number of ports - * NM_BRIDGES max number of switches in the system. - * XXX should become a sysctl or tunable - * - * Switch ports are named valeX:Y where X is the switch name and Y - * is the port. If Y matches a physical interface name, the port is - * connected to a physical device. - * - * Unlike physical interfaces, switch ports use their own memory region - * for rings and buffers. - * The virtual interfaces use per-queue lock instead of core lock. - * In the tx loop, we aggregate traffic in batches to make all operations - * faster. The batch size is bridge_batch. - */ -#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ -#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ -#define NM_BRIDGE_RINGSIZE 1024 /* in the device */ -#define NM_BDG_HASH 1024 /* forwarding table entries */ -#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ -#define NM_MULTISEG 64 /* max size of a chain of bufs */ -/* actual size of the tables */ -#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) -/* NM_FT_NULL terminates a list of slots in the ft */ -#define NM_FT_NULL NM_BDG_BATCH_MAX -#define NM_BRIDGES 8 /* number of bridges */ - - -/* - * bridge_batch is set via sysctl to the max batch size to be - * used in the bridge. The actual value may be larger as the - * last packet in the block may overflow the size. - */ -int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ -SYSCTL_DECL(_dev_netmap); -SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); - - -static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp); -static int bdg_netmap_reg(struct netmap_adapter *na, int onoff); -static int netmap_bwrap_attach(struct ifnet *, struct ifnet *); -static int netmap_bwrap_register(struct netmap_adapter *, int onoff); -int kern_netmap_regif(struct nmreq *nmr); - -/* - * For each output interface, nm_bdg_q is used to construct a list. - * bq_len is the number of output buffers (we can have coalescing - * during the copy). - */ -struct nm_bdg_q { - uint16_t bq_head; - uint16_t bq_tail; - uint32_t bq_len; /* number of buffers */ -}; - -/* XXX revise this */ -struct nm_hash_ent { - uint64_t mac; /* the top 2 bytes are the epoch */ - uint64_t ports; -}; - -/* - * nm_bridge is a descriptor for a VALE switch. - * Interfaces for a bridge are all in bdg_ports[]. - * The array has fixed size, an empty entry does not terminate - * the search, but lookups only occur on attach/detach so we - * don't mind if they are slow. - * - * The bridge is non blocking on the transmit ports: excess - * packets are dropped if there is no room on the output port. - * - * bdg_lock protects accesses to the bdg_ports array. - * This is a rw lock (or equivalent). - */ -struct nm_bridge { - /* XXX what is the proper alignment/layout ? */ - BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ - int bdg_namelen; - uint32_t bdg_active_ports; /* 0 means free */ - char bdg_basename[IFNAMSIZ]; - - /* Indexes of active ports (up to active_ports) - * and all other remaining ports. - */ - uint8_t bdg_port_index[NM_BDG_MAXPORTS]; - - struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; - - - /* - * The function to decide the destination port. - * It returns either of an index of the destination port, - * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to - * forward this packet. ring_nr is the source ring index, and the - * function may overwrite this value to forward this packet to a - * different ring index. - * This function must be set by netmap_bdgctl(). - */ - bdg_lookup_fn_t nm_bdg_lookup; - - /* the forwarding table, MAC+ports. - * XXX should be changed to an argument to be passed to - * the lookup function, and allocated on attach - */ - struct nm_hash_ent ht[NM_BDG_HASH]; -}; - - -/* - * XXX in principle nm_bridges could be created dynamically - * Right now we have a static array and deletions are protected - * by an exclusive lock. - */ -struct nm_bridge nm_bridges[NM_BRIDGES]; - - -/* - * this is a slightly optimized copy routine which rounds - * to multiple of 64 bytes and is often faster than dealing - * with other odd sizes. We assume there is enough room - * in the source and destination buffers. - * - * XXX only for multiples of 64 bytes, non overlapped. - */ -static inline void -pkt_copy(void *_src, void *_dst, int l) -{ - uint64_t *src = _src; - uint64_t *dst = _dst; - if (unlikely(l >= 1024)) { - memcpy(dst, src, l); - return; - } - for (; likely(l > 0); l-=64) { - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } -} - - -/* - * locate a bridge among the existing ones. - * MUST BE CALLED WITH NMG_LOCK() - * - * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. - * We assume that this is called with a name of at least NM_NAME chars. - */ -static struct nm_bridge * -nm_find_bridge(const char *name, int create) -{ - int i, l, namelen; - struct nm_bridge *b = NULL; - - NMG_LOCK_ASSERT(); - - namelen = strlen(NM_NAME); /* base length */ - l = name ? strlen(name) : 0; /* actual length */ - if (l < namelen) { - D("invalid bridge name %s", name ? name : NULL); - return NULL; - } - for (i = namelen + 1; i < l; i++) { - if (name[i] == ':') { - namelen = i; - break; - } - } - if (namelen >= IFNAMSIZ) - namelen = IFNAMSIZ; - ND("--- prefix is '%.*s' ---", namelen, name); - - /* lookup the name, remember empty slot if there is one */ - for (i = 0; i < NM_BRIDGES; i++) { - struct nm_bridge *x = nm_bridges + i; - - if (x->bdg_active_ports == 0) { - if (create && b == NULL) - b = x; /* record empty slot */ - } else if (x->bdg_namelen != namelen) { - continue; - } else if (strncmp(name, x->bdg_basename, namelen) == 0) { - ND("found '%.*s' at %d", namelen, name, i); - b = x; - break; - } - } - if (i == NM_BRIDGES && b) { /* name not found, can create entry */ - /* initialize the bridge */ - strncpy(b->bdg_basename, name, namelen); - ND("create new bridge %s with ports %d", b->bdg_basename, - b->bdg_active_ports); - b->bdg_namelen = namelen; - b->bdg_active_ports = 0; - for (i = 0; i < NM_BDG_MAXPORTS; i++) - b->bdg_port_index[i] = i; - /* set the default function */ - b->nm_bdg_lookup = netmap_bdg_learning; - /* reset the MAC address table */ - bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); - } - return b; -} - - -/* - * Free the forwarding tables for rings attached to switch ports. - */ -static void -nm_free_bdgfwd(struct netmap_adapter *na) -{ - int nrings, i; - struct netmap_kring *kring; - - NMG_LOCK_ASSERT(); - nrings = na->num_tx_rings; - kring = na->tx_rings; - for (i = 0; i < nrings; i++) { - if (kring[i].nkr_ft) { - free(kring[i].nkr_ft, M_DEVBUF); - kring[i].nkr_ft = NULL; /* protect from freeing twice */ - } - } -} - - -/* - * Allocate the forwarding tables for the rings attached to the bridge ports. - */ -static int -nm_alloc_bdgfwd(struct netmap_adapter *na) -{ - int nrings, l, i, num_dstq; - struct netmap_kring *kring; - - NMG_LOCK_ASSERT(); - /* all port:rings + broadcast */ - num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; - l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; - l += sizeof(struct nm_bdg_q) * num_dstq; - l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; - - nrings = netmap_real_tx_rings(na); - kring = na->tx_rings; - for (i = 0; i < nrings; i++) { - struct nm_bdg_fwd *ft; - struct nm_bdg_q *dstq; - int j; - - ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO); - if (!ft) { - nm_free_bdgfwd(na); - return ENOMEM; - } - dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); - for (j = 0; j < num_dstq; j++) { - dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; - dstq[j].bq_len = 0; - } - kring[i].nkr_ft = ft; - } - return 0; -} - - -static void -netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) -{ - int s_hw = hw, s_sw = sw; - int i, lim =b->bdg_active_ports; - uint8_t tmp[NM_BDG_MAXPORTS]; - - /* - New algorithm: - make a copy of bdg_port_index; - lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port - in the array of bdg_port_index, replacing them with - entries from the bottom of the array; - decrement bdg_active_ports; - acquire BDG_WLOCK() and copy back the array. - */ - - if (netmap_verbose) - D("detach %d and %d (lim %d)", hw, sw, lim); - /* make a copy of the list of active ports, update it, - * and then copy back within BDG_WLOCK(). - */ - memcpy(tmp, b->bdg_port_index, sizeof(tmp)); - for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { - if (hw >= 0 && tmp[i] == hw) { - ND("detach hw %d at %d", hw, i); - lim--; /* point to last active port */ - tmp[i] = tmp[lim]; /* swap with i */ - tmp[lim] = hw; /* now this is inactive */ - hw = -1; - } else if (sw >= 0 && tmp[i] == sw) { - ND("detach sw %d at %d", sw, i); - lim--; - tmp[i] = tmp[lim]; - tmp[lim] = sw; - sw = -1; - } else { - i++; - } - } - if (hw >= 0 || sw >= 0) { - D("XXX delete failed hw %d sw %d, should panic...", hw, sw); - } - - BDG_WLOCK(b); - b->bdg_ports[s_hw] = NULL; - if (s_sw >= 0) { - b->bdg_ports[s_sw] = NULL; - } - memcpy(b->bdg_port_index, tmp, sizeof(tmp)); - b->bdg_active_ports = lim; - BDG_WUNLOCK(b); - - ND("now %d active ports", lim); - if (lim == 0) { - ND("marking bridge %s as free", b->bdg_basename); - b->nm_bdg_lookup = NULL; - } -} - - -static void -netmap_adapter_vp_dtor(struct netmap_adapter *na) -{ - struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; - struct nm_bridge *b = vpna->na_bdg; - struct ifnet *ifp = na->ifp; - - ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount); - - if (b) { - netmap_bdg_detach_common(b, vpna->bdg_port, -1); - } - - bzero(ifp, sizeof(*ifp)); - free(ifp, M_DEVBUF); - na->ifp = NULL; -} - - -/* Try to get a reference to a netmap adapter attached to a VALE switch. - * If the adapter is found (or is created), this function returns 0, a - * non NULL pointer is returned into *na, and the caller holds a - * reference to the adapter. - * If an adapter is not found, then no reference is grabbed and the - * function returns an error code, or 0 if there is just a VALE prefix - * mismatch. Therefore the caller holds a reference when - * (*na != NULL && return == 0). - */ -int -netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) -{ - const char *name = nmr->nr_name; - struct ifnet *ifp; - int error = 0; - struct netmap_adapter *ret; - struct netmap_vp_adapter *vpna; - struct nm_bridge *b; - int i, j, cand = -1, cand2 = -1; - int needed; - - *na = NULL; /* default return value */ - - /* first try to see if this is a bridge port. */ - NMG_LOCK_ASSERT(); - if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) { - return 0; /* no error, but no VALE prefix */ - } - - b = nm_find_bridge(name, create); - if (b == NULL) { - D("no bridges available for '%s'", name); - return (create ? ENOMEM : ENXIO); - } - - /* Now we are sure that name starts with the bridge's name, - * lookup the port in the bridge. We need to scan the entire - * list. It is not important to hold a WLOCK on the bridge - * during the search because NMG_LOCK already guarantees - * that there are no other possible writers. - */ - - /* lookup in the local list of ports */ - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - // KASSERT(na != NULL); - ifp = vpna->up.ifp; - /* XXX make sure the name only contains one : */ - if (!strcmp(NM_IFPNAME(ifp), name)) { - netmap_adapter_get(&vpna->up); - ND("found existing if %s refs %d", name, - vpna->na_bdg_refcount); - *na = (struct netmap_adapter *)vpna; - return 0; - } - } - /* not found, should we create it? */ - if (!create) - return ENXIO; - /* yes we should, see if we have space to attach entries */ - needed = 2; /* in some cases we only need 1 */ - if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { - D("bridge full %d, cannot create new port", b->bdg_active_ports); - return ENOMEM; - } - /* record the next two ports available, but do not allocate yet */ - cand = b->bdg_port_index[b->bdg_active_ports]; - cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; - ND("+++ bridge %s port %s used %d avail %d %d", - b->bdg_basename, name, b->bdg_active_ports, cand, cand2); - - /* - * try see if there is a matching NIC with this name - * (after the bridge's name) - */ - ifp = ifunit_ref(name + b->bdg_namelen + 1); - if (!ifp) { /* this is a virtual port */ - if (nmr->nr_cmd) { - /* nr_cmd must be 0 for a virtual port */ - return EINVAL; - } - - /* create a struct ifnet for the new port. - * need M_NOWAIT as we are under nma_lock - */ - ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); - if (!ifp) - return ENOMEM; - - strcpy(ifp->if_xname, name); - /* bdg_netmap_attach creates a struct netmap_adapter */ - error = bdg_netmap_attach(nmr, ifp); - if (error) { - D("error %d", error); - free(ifp, M_DEVBUF); - return error; - } - ret = NA(ifp); - cand2 = -1; /* only need one port */ - } else { /* this is a NIC */ - struct ifnet *fake_ifp; - - error = netmap_get_hw_na(ifp, &ret); - if (error || ret == NULL) - goto out; - - /* make sure the NIC is not already in use */ - if (NETMAP_OWNED_BY_ANY(ret)) { - D("NIC %s busy, cannot attach to bridge", - NM_IFPNAME(ifp)); - error = EBUSY; - goto out; - } - /* create a fake interface */ - fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO); - if (!fake_ifp) { - error = ENOMEM; - goto out; - } - strcpy(fake_ifp->if_xname, name); - error = netmap_bwrap_attach(fake_ifp, ifp); - if (error) { - free(fake_ifp, M_DEVBUF); - goto out; - } - ret = NA(fake_ifp); - if (nmr->nr_arg1 != NETMAP_BDG_HOST) - cand2 = -1; /* only need one port */ - if_rele(ifp); - } - vpna = (struct netmap_vp_adapter *)ret; - - BDG_WLOCK(b); - vpna->bdg_port = cand; - ND("NIC %p to bridge port %d", vpna, cand); - /* bind the port to the bridge (virtual ports are not active) */ - b->bdg_ports[cand] = vpna; - vpna->na_bdg = b; - b->bdg_active_ports++; - if (cand2 >= 0) { - struct netmap_vp_adapter *hostna = vpna + 1; - /* also bind the host stack to the bridge */ - b->bdg_ports[cand2] = hostna; - hostna->bdg_port = cand2; - hostna->na_bdg = b; - b->bdg_active_ports++; - ND("host %p to bridge port %d", hostna, cand2); - } - ND("if %s refs %d", name, vpna->up.na_refcount); - BDG_WUNLOCK(b); - *na = ret; - netmap_adapter_get(ret); - return 0; - -out: - if_rele(ifp); - - return error; -} - - -/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */ -static int -nm_bdg_attach(struct nmreq *nmr) -{ - struct netmap_adapter *na; - struct netmap_if *nifp; - struct netmap_priv_d *npriv; - struct netmap_bwrap_adapter *bna; - int error; - - npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); - if (npriv == NULL) - return ENOMEM; - - NMG_LOCK(); - - error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */); - if (error) /* no device, or another bridge or user owns the device */ - goto unlock_exit; - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } - - if (na->active_fds > 0) { /* already registered */ - error = EBUSY; - goto unref_exit; - } - - nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error); - if (!nifp) { - goto unref_exit; - } - - bna = (struct netmap_bwrap_adapter*)na; - bna->na_kpriv = npriv; - NMG_UNLOCK(); - ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp)); - return 0; - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - bzero(npriv, sizeof(*npriv)); - free(npriv, M_DEVBUF); - return error; -} - - -static int -nm_bdg_detach(struct nmreq *nmr) -{ - struct netmap_adapter *na; - int error; - struct netmap_bwrap_adapter *bna; - int last_instance; - - NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */); - if (error) { /* no device, or another bridge or user owns the device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } - - bna = (struct netmap_bwrap_adapter *)na; - - if (na->active_fds == 0) { /* not registered */ - error = EINVAL; - goto unref_exit; - } - - last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */ - if (!last_instance) { - D("--- error, trying to detach an entry with active mmaps"); - error = EINVAL; - } else { - struct netmap_priv_d *npriv = bna->na_kpriv; - - bna->na_kpriv = NULL; - D("deleting priv"); - - bzero(npriv, sizeof(*npriv)); - free(npriv, M_DEVBUF); - } - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; - -} - - -/* exported to kernel callers, e.g. OVS ? - * Entry point. - * Called without NMG_LOCK. - */ -int -netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func) -{ - struct nm_bridge *b; - struct netmap_adapter *na; - struct netmap_vp_adapter *vpna; - struct ifnet *iter; - char *name = nmr->nr_name; - int cmd = nmr->nr_cmd, namelen = strlen(name); - int error = 0, i, j; - - switch (cmd) { - case NETMAP_BDG_ATTACH: - error = nm_bdg_attach(nmr); - break; - - case NETMAP_BDG_DETACH: - error = nm_bdg_detach(nmr); - break; - - case NETMAP_BDG_LIST: - /* this is used to enumerate bridges and ports */ - if (namelen) { /* look up indexes of bridge and port */ - if (strncmp(name, NM_NAME, strlen(NM_NAME))) { - error = EINVAL; - break; - } - NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); - if (!b) { - error = ENOENT; - NMG_UNLOCK(); - break; - } - - error = ENOENT; - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - if (vpna == NULL) { - D("---AAAAAAAAARGH-------"); - continue; - } - iter = vpna->up.ifp; - /* the former and the latter identify a - * virtual port and a NIC, respectively - */ - if (!strcmp(iter->if_xname, name)) { - /* bridge index */ - nmr->nr_arg1 = b - nm_bridges; - nmr->nr_arg2 = i; /* port index */ - error = 0; - break; - } - } - NMG_UNLOCK(); - } else { - /* return the first non-empty entry starting from - * bridge nr_arg1 and port nr_arg2. - * - * Users can detect the end of the same bridge by - * seeing the new and old value of nr_arg1, and can - * detect the end of all the bridge by error != 0 - */ - i = nmr->nr_arg1; - j = nmr->nr_arg2; - - NMG_LOCK(); - for (error = ENOENT; i < NM_BRIDGES; i++) { - b = nm_bridges + i; - if (j >= b->bdg_active_ports) { - j = 0; /* following bridges scan from 0 */ - continue; - } - nmr->nr_arg1 = i; - nmr->nr_arg2 = j; - j = b->bdg_port_index[j]; - vpna = b->bdg_ports[j]; - iter = vpna->up.ifp; - strncpy(name, iter->if_xname, (size_t)IFNAMSIZ); - error = 0; - break; - } - NMG_UNLOCK(); - } - break; - - case NETMAP_BDG_LOOKUP_REG: - /* register a lookup function to the given bridge. - * nmr->nr_name may be just bridge's name (including ':' - * if it is not just NM_NAME). - */ - if (!func) { - error = EINVAL; - break; - } - NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); - if (!b) { - error = EINVAL; - } else { - b->nm_bdg_lookup = func; - } - NMG_UNLOCK(); - break; - - case NETMAP_BDG_VNET_HDR: - /* Valid lengths for the virtio-net header are 0 (no header), - 10 and 12. */ - if (nmr->nr_arg1 != 0 && - nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && - nmr->nr_arg1 != 12) { - error = EINVAL; - break; - } - NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, 0); - if (na && !error) { - vpna = (struct netmap_vp_adapter *)na; - vpna->virt_hdr_len = nmr->nr_arg1; - if (vpna->virt_hdr_len) - vpna->mfs = NETMAP_BDG_BUF_SIZE(na->nm_mem); - D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna); - netmap_adapter_put(na); - } - NMG_UNLOCK(); - break; - - default: - D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); - error = EINVAL; - break; - } - return error; -} - -static int -netmap_vp_krings_create(struct netmap_adapter *na) -{ - u_int tailroom; - int error, i; - uint32_t *leases; - u_int nrx = netmap_real_rx_rings(na); - - /* - * Leases are attached to RX rings on vale ports - */ - tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx; - - error = netmap_krings_create(na, tailroom); - if (error) - return error; - - leases = na->tailroom; - - for (i = 0; i < nrx; i++) { /* Receive rings */ - na->rx_rings[i].nkr_leases = leases; - leases += na->num_rx_desc; - } - - error = nm_alloc_bdgfwd(na); - if (error) { - netmap_krings_delete(na); - return error; - } - - return 0; -} - - -static void -netmap_vp_krings_delete(struct netmap_adapter *na) -{ - nm_free_bdgfwd(na); - netmap_krings_delete(na); -} - - -static int -nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, - struct netmap_vp_adapter *na, u_int ring_nr); - - -/* - * Grab packets from a kring, move them into the ft structure - * associated to the tx (input) port. Max one instance per port, - * filtered on input (ioctl, poll or XXX). - * Returns the next position in the ring. - */ -static int -nm_bdg_preflush(struct netmap_kring *kring, u_int end) -{ - struct netmap_vp_adapter *na = - (struct netmap_vp_adapter*)kring->na; - struct netmap_ring *ring = kring->ring; - struct nm_bdg_fwd *ft; - u_int ring_nr = kring->ring_id; - u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1; - u_int ft_i = 0; /* start from 0 */ - u_int frags = 1; /* how many frags ? */ - struct nm_bridge *b = na->na_bdg; - - /* To protect against modifications to the bridge we acquire a - * shared lock, waiting if we can sleep (if the source port is - * attached to a user process) or with a trylock otherwise (NICs). - */ - ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j); - if (na->up.na_flags & NAF_BDG_MAYSLEEP) - BDG_RLOCK(b); - else if (!BDG_RTRYLOCK(b)) - return 0; - ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j); - ft = kring->nkr_ft; - - for (; likely(j != end); j = nm_next(j, lim)) { - struct netmap_slot *slot = &ring->slot[j]; - char *buf; - - ft[ft_i].ft_len = slot->len; - ft[ft_i].ft_flags = slot->flags; - - ND("flags is 0x%x", slot->flags); - /* this slot goes into a list so initialize the link field */ - ft[ft_i].ft_next = NM_FT_NULL; - buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? - (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot); - __builtin_prefetch(buf); - ++ft_i; - if (slot->flags & NS_MOREFRAG) { - frags++; - continue; - } - if (unlikely(netmap_verbose && frags > 1)) - RD(5, "%d frags at %d", frags, ft_i - frags); - ft[ft_i - frags].ft_frags = frags; - frags = 1; - if (unlikely((int)ft_i >= bridge_batch)) - ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); - } - if (frags > 1) { - D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); - // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG - ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; - ft[ft_i - frags].ft_frags = frags - 1; - } - if (ft_i) - ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); - BDG_RUNLOCK(b); - return j; -} - - -/* ----- FreeBSD if_bridge hash function ------- */ - -/* - * The following hash function is adapted from "Hash Functions" by Bob Jenkins - * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). - * - * http://www.burtleburtle.net/bob/hash/spooky.html - */ -#define mix(a, b, c) \ -do { \ - a -= b; a -= c; a ^= (c >> 13); \ - b -= c; b -= a; b ^= (a << 8); \ - c -= a; c -= b; c ^= (b >> 13); \ - a -= b; a -= c; a ^= (c >> 12); \ - b -= c; b -= a; b ^= (a << 16); \ - c -= a; c -= b; c ^= (b >> 5); \ - a -= b; a -= c; a ^= (c >> 3); \ - b -= c; b -= a; b ^= (a << 10); \ - c -= a; c -= b; c ^= (b >> 15); \ -} while (/*CONSTCOND*/0) - - -static __inline uint32_t -nm_bridge_rthash(const uint8_t *addr) -{ - uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key - - b += addr[5] << 8; - b += addr[4]; - a += addr[3] << 24; - a += addr[2] << 16; - a += addr[1] << 8; - a += addr[0]; - - mix(a, b, c); -#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1) - return (c & BRIDGE_RTHASH_MASK); -} - -#undef mix - - -static int -bdg_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct netmap_vp_adapter *vpna = - (struct netmap_vp_adapter*)na; - struct ifnet *ifp = na->ifp; - - /* the interface is already attached to the bridge, - * so we only need to toggle IFCAP_NETMAP. - */ - BDG_WLOCK(vpna->na_bdg); - if (onoff) { - ifp->if_capenable |= IFCAP_NETMAP; - } else { - ifp->if_capenable &= ~IFCAP_NETMAP; - } - BDG_WUNLOCK(vpna->na_bdg); - return 0; -} - - -/* - * Lookup function for a learning bridge. - * Update the hash table with the source address, - * and then returns the destination port index, and the - * ring in *dst_ring (at the moment, always use ring 0) - */ -u_int -netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring, - struct netmap_vp_adapter *na) -{ - struct nm_hash_ent *ht = na->na_bdg->ht; - uint32_t sh, dh; - u_int dst, mysrc = na->bdg_port; - uint64_t smac, dmac; - - if (buf_len < 14) { - D("invalid buf length %d", buf_len); - return NM_BDG_NOPORT; - } - dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; - smac = le64toh(*(uint64_t *)(buf + 4)); - smac >>= 16; - - /* - * The hash is somewhat expensive, there might be some - * worthwhile optimizations here. - */ - if ((buf[6] & 1) == 0) { /* valid src */ - uint8_t *s = buf+6; - sh = nm_bridge_rthash(s); // XXX hash of source - /* update source port forwarding entry */ - ht[sh].mac = smac; /* XXX expire ? */ - ht[sh].ports = mysrc; - if (netmap_verbose) - D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", - s[0], s[1], s[2], s[3], s[4], s[5], mysrc); - } - dst = NM_BDG_BROADCAST; - if ((buf[0] & 1) == 0) { /* unicast */ - dh = nm_bridge_rthash(buf); // XXX hash of dst - if (ht[dh].mac == dmac) { /* found dst */ - dst = ht[dh].ports; - } - /* XXX otherwise return NM_BDG_UNKNOWN ? */ - } - *dst_ring = 0; - return dst; -} - - -/* - * Available space in the ring. Only used in VALE code - * and only with is_rx = 1 - */ -static inline uint32_t -nm_kr_space(struct netmap_kring *k, int is_rx) -{ - int space; - - if (is_rx) { - int busy = k->nkr_hwlease - k->nr_hwcur; - if (busy < 0) - busy += k->nkr_num_slots; - space = k->nkr_num_slots - 1 - busy; - } else { - /* XXX never used in this branch */ - space = k->nr_hwtail - k->nkr_hwlease; - if (space < 0) - space += k->nkr_num_slots; - } -#if 0 - // sanity check - if (k->nkr_hwlease >= k->nkr_num_slots || - k->nr_hwcur >= k->nkr_num_slots || - k->nr_tail >= k->nkr_num_slots || - busy < 0 || - busy >= k->nkr_num_slots) { - D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, - k->nkr_lease_idx, k->nkr_num_slots); - } -#endif - return space; -} - - - - -/* make a lease on the kring for N positions. return the - * lease index - * XXX only used in VALE code and with is_rx = 1 - */ -static inline uint32_t -nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx) -{ - uint32_t lim = k->nkr_num_slots - 1; - uint32_t lease_idx = k->nkr_lease_idx; - - k->nkr_leases[lease_idx] = NR_NOSLOT; - k->nkr_lease_idx = nm_next(lease_idx, lim); - - if (n > nm_kr_space(k, is_rx)) { - D("invalid request for %d slots", n); - panic("x"); - } - /* XXX verify that there are n slots */ - k->nkr_hwlease += n; - if (k->nkr_hwlease > lim) - k->nkr_hwlease -= lim + 1; - - if (k->nkr_hwlease >= k->nkr_num_slots || - k->nr_hwcur >= k->nkr_num_slots || - k->nr_hwtail >= k->nkr_num_slots || - k->nkr_lease_idx >= k->nkr_num_slots) { - D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", - k->na->ifp->if_xname, - k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, - k->nkr_lease_idx, k->nkr_num_slots); - } - return lease_idx; -} - -/* - * This flush routine supports only unicast and broadcast but a large - * number of ports, and lets us replace the learn and dispatch functions. - */ -int -nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, - u_int ring_nr) -{ - struct nm_bdg_q *dst_ents, *brddst; - uint16_t num_dsts = 0, *dsts; - struct nm_bridge *b = na->na_bdg; - u_int i, j, me = na->bdg_port; - - /* - * The work area (pointed by ft) is followed by an array of - * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS - * queues per port plus one for the broadcast traffic. - * Then we have an array of destination indexes. - */ - dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); - dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); - - /* first pass: find a destination for each packet in the batch */ - for (i = 0; likely(i < n); i += ft[i].ft_frags) { - uint8_t dst_ring = ring_nr; /* default, same ring as origin */ - uint16_t dst_port, d_i; - struct nm_bdg_q *d; - uint8_t *buf = ft[i].ft_buf; - u_int len = ft[i].ft_len; - - ND("slot %d frags %d", i, ft[i].ft_frags); - /* Drop the packet if the virtio-net header is not into the first - fragment nor at the very beginning of the second. */ - if (unlikely(na->virt_hdr_len > len)) - continue; - if (len == na->virt_hdr_len) { - buf = ft[i+1].ft_buf; - len = ft[i+1].ft_len; - } else { - buf += na->virt_hdr_len; - len -= na->virt_hdr_len; - } - dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na); - if (netmap_verbose > 255) - RD(5, "slot %d port %d -> %d", i, me, dst_port); - if (dst_port == NM_BDG_NOPORT) - continue; /* this packet is identified to be dropped */ - else if (unlikely(dst_port > NM_BDG_MAXPORTS)) - continue; - else if (dst_port == NM_BDG_BROADCAST) - dst_ring = 0; /* broadcasts always go to ring 0 */ - else if (unlikely(dst_port == me || - !b->bdg_ports[dst_port])) - continue; - - /* get a position in the scratch pad */ - d_i = dst_port * NM_BDG_MAXRINGS + dst_ring; - d = dst_ents + d_i; - - /* append the first fragment to the list */ - if (d->bq_head == NM_FT_NULL) { /* new destination */ - d->bq_head = d->bq_tail = i; - /* remember this position to be scanned later */ - if (dst_port != NM_BDG_BROADCAST) - dsts[num_dsts++] = d_i; - } else { - ft[d->bq_tail].ft_next = i; - d->bq_tail = i; - } - d->bq_len += ft[i].ft_frags; - } - - /* - * Broadcast traffic goes to ring 0 on all destinations. - * So we need to add these rings to the list of ports to scan. - * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is - * expensive. We should keep a compact list of active destinations - * so we could shorten this loop. - */ - brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; - if (brddst->bq_head != NM_FT_NULL) { - for (j = 0; likely(j < b->bdg_active_ports); j++) { - uint16_t d_i; - i = b->bdg_port_index[j]; - if (unlikely(i == me)) - continue; - d_i = i * NM_BDG_MAXRINGS; - if (dst_ents[d_i].bq_head == NM_FT_NULL) - dsts[num_dsts++] = d_i; - } - } - - ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts); - /* second pass: scan destinations (XXX will be modular somehow) */ - for (i = 0; i < num_dsts; i++) { - struct ifnet *dst_ifp; - struct netmap_vp_adapter *dst_na; - struct netmap_kring *kring; - struct netmap_ring *ring; - u_int dst_nr, lim, j, d_i, next, brd_next; - u_int needed, howmany; - int retry = netmap_txsync_retry; - struct nm_bdg_q *d; - uint32_t my_start = 0, lease_idx = 0; - int nrings; - int virt_hdr_mismatch = 0; - - d_i = dsts[i]; - ND("second pass %d port %d", i, d_i); - d = dst_ents + d_i; - // XXX fix the division - dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS]; - /* protect from the lookup function returning an inactive - * destination port - */ - if (unlikely(dst_na == NULL)) - goto cleanup; - if (dst_na->up.na_flags & NAF_SW_ONLY) - goto cleanup; - dst_ifp = dst_na->up.ifp; - /* - * The interface may be in !netmap mode in two cases: - * - when na is attached but not activated yet; - * - when na is being deactivated but is still attached. - */ - if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) { - ND("not in netmap mode!"); - goto cleanup; - } - - /* there is at least one either unicast or broadcast packet */ - brd_next = brddst->bq_head; - next = d->bq_head; - /* we need to reserve this many slots. If fewer are - * available, some packets will be dropped. - * Packets may have multiple fragments, so we may not use - * there is a chance that we may not use all of the slots - * we have claimed, so we will need to handle the leftover - * ones when we regain the lock. - */ - needed = d->bq_len + brddst->bq_len; - - if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { - /* There is a virtio-net header/offloadings mismatch between - * source and destination. The slower mismatch datapath will - * be used to cope with all the mismatches. - */ - virt_hdr_mismatch = 1; - if (dst_na->mfs < na->mfs) { - /* We may need to do segmentation offloadings, and so - * we may need a number of destination slots greater - * than the number of input slots ('needed'). - * We look for the smallest integer 'x' which satisfies: - * needed * na->mfs + x * H <= x * na->mfs - * where 'H' is the length of the longest header that may - * be replicated in the segmentation process (e.g. for - * TCPv4 we must account for ethernet header, IP header - * and TCPv4 header). - */ - needed = (needed * na->mfs) / - (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; - ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); - } - } - - ND(5, "pass 2 dst %d is %x %s", - i, d_i, is_vp ? "virtual" : "nic/host"); - dst_nr = d_i & (NM_BDG_MAXRINGS-1); - nrings = dst_na->up.num_rx_rings; - if (dst_nr >= nrings) - dst_nr = dst_nr % nrings; - kring = &dst_na->up.rx_rings[dst_nr]; - ring = kring->ring; - lim = kring->nkr_num_slots - 1; - -retry: - - if (dst_na->retry && retry) { - /* try to get some free slot from the previous run */ - dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); - } - /* reserve the buffers in the queue and an entry - * to report completion, and drop lock. - * XXX this might become a helper function. - */ - mtx_lock(&kring->q_lock); - if (kring->nkr_stopped) { - mtx_unlock(&kring->q_lock); - goto cleanup; - } - my_start = j = kring->nkr_hwlease; - howmany = nm_kr_space(kring, 1); - if (needed < howmany) - howmany = needed; - lease_idx = nm_kr_lease(kring, howmany, 1); - mtx_unlock(&kring->q_lock); - - /* only retry if we need more than available slots */ - if (retry && needed <= howmany) - retry = 0; - - /* copy to the destination queue */ - while (howmany > 0) { - struct netmap_slot *slot; - struct nm_bdg_fwd *ft_p, *ft_end; - u_int cnt; - - /* find the queue from which we pick next packet. - * NM_FT_NULL is always higher than valid indexes - * so we never dereference it if the other list - * has packets (and if both are empty we never - * get here). - */ - if (next < brd_next) { - ft_p = ft + next; - next = ft_p->ft_next; - } else { /* insert broadcast */ - ft_p = ft + brd_next; - brd_next = ft_p->ft_next; - } - cnt = ft_p->ft_frags; // cnt > 0 - if (unlikely(cnt > howmany)) - break; /* no more space */ - if (netmap_verbose && cnt > 1) - RD(5, "rx %d frags to %d", cnt, j); - ft_end = ft_p + cnt; - if (unlikely(virt_hdr_mismatch)) { - bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany); - } else { - howmany -= cnt; - do { - char *dst, *src = ft_p->ft_buf; - size_t copy_len = ft_p->ft_len, dst_len = copy_len; - - slot = &ring->slot[j]; - dst = BDG_NMB(&dst_na->up, slot); - - ND("send [%d] %d(%d) bytes at %s:%d", - i, (int)copy_len, (int)dst_len, - NM_IFPNAME(dst_ifp), j); - /* round to a multiple of 64 */ - copy_len = (copy_len + 63) & ~63; - - if (ft_p->ft_flags & NS_INDIRECT) { - if (copyin(src, dst, copy_len)) { - // invalid user pointer, pretend len is 0 - dst_len = 0; - } - } else { - //memcpy(dst, src, copy_len); - pkt_copy(src, dst, (int)copy_len); - } - slot->len = dst_len; - slot->flags = (cnt << 8)| NS_MOREFRAG; - j = nm_next(j, lim); - needed--; - ft_p++; - } while (ft_p != ft_end); - slot->flags = (cnt << 8); /* clear flag on last entry */ - } - /* are we done ? */ - if (next == NM_FT_NULL && brd_next == NM_FT_NULL) - break; - } - { - /* current position */ - uint32_t *p = kring->nkr_leases; /* shorthand */ - uint32_t update_pos; - int still_locked = 1; - - mtx_lock(&kring->q_lock); - if (unlikely(howmany > 0)) { - /* not used all bufs. If i am the last one - * i can recover the slots, otherwise must - * fill them with 0 to mark empty packets. - */ - ND("leftover %d bufs", howmany); - if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) { - /* yes i am the last one */ - ND("roll back nkr_hwlease to %d", j); - kring->nkr_hwlease = j; - } else { - while (howmany-- > 0) { - ring->slot[j].len = 0; - ring->slot[j].flags = 0; - j = nm_next(j, lim); - } - } - } - p[lease_idx] = j; /* report I am done */ - - update_pos = kring->nr_hwtail; - - if (my_start == update_pos) { - /* all slots before my_start have been reported, - * so scan subsequent leases to see if other ranges - * have been completed, and to a selwakeup or txsync. - */ - while (lease_idx != kring->nkr_lease_idx && - p[lease_idx] != NR_NOSLOT) { - j = p[lease_idx]; - p[lease_idx] = NR_NOSLOT; - lease_idx = nm_next(lease_idx, lim); - } - /* j is the new 'write' position. j != my_start - * means there are new buffers to report - */ - if (likely(j != my_start)) { - kring->nr_hwtail = j; - still_locked = 0; - mtx_unlock(&kring->q_lock); - dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); - if (dst_na->retry && retry--) - goto retry; - } - } - if (still_locked) - mtx_unlock(&kring->q_lock); - } -cleanup: - d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */ - d->bq_len = 0; - } - brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */ - brddst->bq_len = 0; - return 0; -} - -/* - * main dispatch routine for the bridge. - * We already know that only one thread is running this. - * we must run nm_bdg_preflush without lock. - */ -static int -netmap_vp_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_vp_adapter *na = - (struct netmap_vp_adapter *)kring->na; - u_int done; - u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = kring->rcur; - - if (bridge_batch <= 0) { /* testing only */ - done = cur; // used all - goto done; - } - if (bridge_batch > NM_BDG_BATCH) - bridge_batch = NM_BDG_BATCH; - - done = nm_bdg_preflush(kring, cur); -done: - if (done != cur) - D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); - /* - * packets between 'done' and 'cur' are left unsent. - */ - kring->nr_hwcur = done; - kring->nr_hwtail = nm_prev(done, lim); - nm_txsync_finalize(kring); - if (netmap_verbose) - D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), kring->ring_id, flags); - return 0; -} - - -static int -netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct netmap_ring *ring = kring->ring; - u_int nm_i, lim = kring->nkr_num_slots - 1; - u_int head = nm_rxsync_prologue(kring); - int n; - - if (head > lim) { - D("ouch dangerous reset!!!"); - n = netmap_ring_reinit(kring); - goto done; - } - - /* First part, import newly received packets. */ - /* actually nothing to do here, they are already in the kring */ - - /* Second part, skip past packets that userspace has released. */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - /* consistency check, but nothing really important here */ - for (n = 0; likely(nm_i != head); n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - void *addr = BDG_NMB(na, slot); - - if (addr == netmap_buffer_base) { /* bad buf */ - D("bad buffer index %d, ignore ?", - slot->buf_idx); - } - slot->flags &= ~NS_BUF_CHANGED; - nm_i = nm_next(nm_i, lim); - } - kring->nr_hwcur = head; - } - - /* tell userspace that there are new packets */ - nm_rxsync_finalize(kring); - n = 0; -done: - return n; -} - -/* - * user process reading from a VALE switch. - * Already protected against concurrent calls from userspace, - * but we must acquire the queue's lock to protect against - * writers on the same queue. - */ -static int -netmap_vp_rxsync(struct netmap_kring *kring, int flags) -{ - int n; - - mtx_lock(&kring->q_lock); - n = netmap_vp_rxsync_locked(kring, flags); - mtx_unlock(&kring->q_lock); - return n; -} - - -static int -bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp) -{ - struct netmap_vp_adapter *vpna; - struct netmap_adapter *na; - int error; - u_int npipes = 0; - - vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO); - if (vpna == NULL) - return ENOMEM; - - na = &vpna->up; - - na->ifp = ifp; - - /* bound checking */ - na->num_tx_rings = nmr->nr_tx_rings; - nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); - nmr->nr_tx_rings = na->num_tx_rings; // write back - na->num_rx_rings = nmr->nr_rx_rings; - nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); - nmr->nr_rx_rings = na->num_rx_rings; // write back - nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, - 1, NM_BDG_MAXSLOTS, NULL); - na->num_tx_desc = nmr->nr_tx_slots; - nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, - 1, NM_BDG_MAXSLOTS, NULL); - /* validate number of pipes. We want at least 1, - * but probably can do with some more. - * So let's use 2 as default (when 0 is supplied) - */ - npipes = nmr->nr_arg1; - nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); - nmr->nr_arg1 = npipes; /* write back */ - /* validate extra bufs */ - nm_bound_var(&nmr->nr_arg3, 0, 0, - 128*NM_BDG_MAXSLOTS, NULL); - na->num_rx_desc = nmr->nr_rx_slots; - vpna->virt_hdr_len = 0; - vpna->mfs = 1514; - /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? - vpna->mfs = netmap_buf_size; */ - if (netmap_verbose) - D("max frame size %u", vpna->mfs); - - na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; - na->nm_txsync = netmap_vp_txsync; - na->nm_rxsync = netmap_vp_rxsync; - na->nm_register = bdg_netmap_reg; - na->nm_dtor = netmap_adapter_vp_dtor; - na->nm_krings_create = netmap_vp_krings_create; - na->nm_krings_delete = netmap_vp_krings_delete; - na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp), - na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc, - nmr->nr_arg3, npipes, &error); - if (na->nm_mem == NULL) - goto err; - /* other nmd fields are set in the common routine */ - error = netmap_attach_common(na); - if (error) - goto err; - return 0; - -err: - if (na->nm_mem != NULL) - netmap_mem_private_delete(na->nm_mem); - free(vpna, M_DEVBUF); - return error; -} - - -static void -netmap_bwrap_dtor(struct netmap_adapter *na) -{ - struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; - struct netmap_adapter *hwna = bna->hwna; - struct nm_bridge *b = bna->up.na_bdg, - *bh = bna->host.na_bdg; - struct ifnet *ifp = na->ifp; - - ND("na %p", na); - - if (b) { - netmap_bdg_detach_common(b, bna->up.bdg_port, - (bh ? bna->host.bdg_port : -1)); - } - - hwna->na_private = NULL; - netmap_adapter_put(hwna); - - bzero(ifp, sizeof(*ifp)); - free(ifp, M_DEVBUF); - na->ifp = NULL; - -} - - -/* - * Intr callback for NICs connected to a bridge. - * Simply ignore tx interrupts (maybe we could try to recover space ?) - * and pass received packets from nic to the bridge. - * - * XXX TODO check locking: this is called from the interrupt - * handler so we should make sure that the interface is not - * disconnected while passing down an interrupt. - * - * Note, no user process can access this NIC or the host stack. - * The only part of the ring that is significant are the slots, - * and head/cur/tail are set from the kring as needed - * (part as a receive ring, part as a transmit ring). - * - * callback that overwrites the hwna notify callback. - * Packets come from the outside or from the host stack and are put on an hwna rx ring. - * The bridge wrapper then sends the packets through the bridge. - */ -static int -netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) -{ - struct ifnet *ifp = na->ifp; - struct netmap_bwrap_adapter *bna = na->na_private; - struct netmap_vp_adapter *hostna = &bna->host; - struct netmap_kring *kring, *bkring; - struct netmap_ring *ring; - int is_host_ring = ring_nr == na->num_rx_rings; - struct netmap_vp_adapter *vpna = &bna->up; - int error = 0; - - if (netmap_verbose) - D("%s %s%d 0x%x", NM_IFPNAME(ifp), - (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); - - if (flags & NAF_DISABLE_NOTIFY) { - kring = tx == NR_TX ? na->tx_rings : na->rx_rings; - bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings; - if (kring[ring_nr].nkr_stopped) - netmap_disable_ring(&bkring[ring_nr]); - else - bkring[ring_nr].nkr_stopped = 0; - return 0; - } - - if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP)) - return 0; - - /* we only care about receive interrupts */ - if (tx == NR_TX) - return 0; - - kring = &na->rx_rings[ring_nr]; - ring = kring->ring; - - /* make sure the ring is not disabled */ - if (nm_kr_tryget(kring)) - return 0; - - if (is_host_ring && hostna->na_bdg == NULL) { - error = bna->save_notify(na, ring_nr, tx, flags); - goto put_out; - } - - /* Here we expect ring->head = ring->cur = ring->tail - * because everything has been released from the previous round. - * However the ring is shared and we might have info from - * the wrong side (the tx ring). Hence we overwrite with - * the info from the rx kring. - */ - if (netmap_verbose) - D("%s head %d cur %d tail %d (kring %d %d %d)", NM_IFPNAME(ifp), - ring->head, ring->cur, ring->tail, - kring->rhead, kring->rcur, kring->rtail); - - ring->head = kring->rhead; - ring->cur = kring->rcur; - ring->tail = kring->rtail; - - if (is_host_ring) { - vpna = hostna; - ring_nr = 0; - } - /* simulate a user wakeup on the rx ring */ - /* fetch packets that have arrived. - * XXX maybe do this in a loop ? - */ - error = kring->nm_sync(kring, 0); - if (error) - goto put_out; - if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { - D("how strange, interrupt with no packets on %s", - NM_IFPNAME(ifp)); - goto put_out; - } - - /* new packets are ring->cur to ring->tail, and the bkring - * had hwcur == ring->cur. So advance ring->cur to ring->tail - * to push all packets out. - */ - ring->head = ring->cur = ring->tail; - - /* also set tail to what the bwrap expects */ - bkring = &vpna->up.tx_rings[ring_nr]; - ring->tail = bkring->nr_hwtail; // rtail too ? - - /* pass packets to the switch */ - nm_txsync_prologue(bkring); // XXX error checking ? - netmap_vp_txsync(bkring, flags); - - /* mark all buffers as released on this ring */ - ring->head = ring->cur = kring->nr_hwtail; - ring->tail = kring->rtail; - /* another call to actually release the buffers */ - if (!is_host_ring) { - error = kring->nm_sync(kring, 0); - } else { - /* mark all packets as released, as in the - * second part of netmap_rxsync_from_host() - */ - kring->nr_hwcur = kring->nr_hwtail; - nm_rxsync_finalize(kring); - } - -put_out: - nm_kr_put(kring); - return error; -} - - -static int -netmap_bwrap_register(struct netmap_adapter *na, int onoff) -{ - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - struct netmap_vp_adapter *hostna = &bna->host; - int error; - - ND("%s %s", NM_IFPNAME(na->ifp), onoff ? "on" : "off"); - - if (onoff) { - int i; - - hwna->na_lut = na->na_lut; - hwna->na_lut_objtotal = na->na_lut_objtotal; - - if (hostna->na_bdg) { - hostna->up.na_lut = na->na_lut; - hostna->up.na_lut_objtotal = na->na_lut_objtotal; - } - - /* cross-link the netmap rings - * The original number of rings comes from hwna, - * rx rings on one side equals tx rings on the other. - */ - for (i = 0; i < na->num_rx_rings + 1; i++) { - hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; - hwna->tx_rings[i].ring = na->rx_rings[i].ring; - } - for (i = 0; i < na->num_tx_rings + 1; i++) { - hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; - hwna->rx_rings[i].ring = na->tx_rings[i].ring; - } - } - - if (hwna->ifp) { - error = hwna->nm_register(hwna, onoff); - if (error) - return error; - } - - bdg_netmap_reg(na, onoff); - - if (onoff) { - bna->save_notify = hwna->nm_notify; - hwna->nm_notify = netmap_bwrap_intr_notify; - } else { - hwna->nm_notify = bna->save_notify; - hwna->na_lut = NULL; - hwna->na_lut_objtotal = 0; - } - - return 0; -} - - -static int -netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) -{ - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - - /* forward the request */ - netmap_update_config(hwna); - /* swap the results */ - *txr = hwna->num_rx_rings; - *txd = hwna->num_rx_desc; - *rxr = hwna->num_tx_rings; - *rxd = hwna->num_rx_desc; - - return 0; -} - - -static int -netmap_bwrap_krings_create(struct netmap_adapter *na) -{ - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - struct netmap_adapter *hostna = &bna->host.up; - int error; - - ND("%s", NM_IFPNAME(na->ifp)); - - error = netmap_vp_krings_create(na); - if (error) - return error; - - error = hwna->nm_krings_create(hwna); - if (error) { - netmap_vp_krings_delete(na); - return error; - } - - if (na->na_flags & NAF_HOST_RINGS) { - /* the hostna rings are the host rings of the bwrap. - * The corresponding krings must point back to the - * hostna - */ - hostna->tx_rings = na->tx_rings + na->num_tx_rings; - hostna->tx_rings[0].na = hostna; - hostna->rx_rings = na->rx_rings + na->num_rx_rings; - hostna->rx_rings[0].na = hostna; - } - - return 0; -} - - -static void -netmap_bwrap_krings_delete(struct netmap_adapter *na) -{ - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - - ND("%s", NM_IFPNAME(na->ifp)); - - hwna->nm_krings_delete(hwna); - netmap_vp_krings_delete(na); -} - - -/* notify method for the bridge-->hwna direction */ -static int -netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) -{ - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - struct netmap_kring *kring, *hw_kring; - struct netmap_ring *ring; - u_int lim; - int error = 0; - - if (tx == NR_TX) - return EINVAL; - - kring = &na->rx_rings[ring_n]; - hw_kring = &hwna->tx_rings[ring_n]; - ring = kring->ring; - lim = kring->nkr_num_slots - 1; - - if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP)) - return 0; - mtx_lock(&kring->q_lock); - /* first step: simulate a user wakeup on the rx ring */ - netmap_vp_rxsync_locked(kring, flags); - ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", - NM_IFPNAME(na->ifp), ring_n, - kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, - hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); - /* second step: the simulated user consumes all new packets */ - ring->head = ring->cur = ring->tail; - - /* third step: the new packets are sent on the tx ring - * (which is actually the same ring) - */ - /* set tail to what the hw expects */ - ring->tail = hw_kring->rtail; - nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? - error = hw_kring->nm_sync(hw_kring, flags); - - /* fourth step: now we are back the rx ring */ - /* claim ownership on all hw owned bufs */ - ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ - ring->tail = kring->rtail; /* restore saved value of tail, for safety */ - - /* fifth step: the user goes to sleep again, causing another rxsync */ - netmap_vp_rxsync_locked(kring, flags); - ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", - NM_IFPNAME(na->ifp), ring_n, - kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, - hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); - mtx_unlock(&kring->q_lock); - return error; -} - - -static int -netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) -{ - struct netmap_bwrap_adapter *bna = na->na_private; - struct netmap_adapter *port_na = &bna->up.up; - if (tx == NR_TX || ring_n != 0) - return EINVAL; - return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); -} - - -/* attach a bridge wrapper to the 'real' device */ -static int -netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real) -{ - struct netmap_bwrap_adapter *bna; - struct netmap_adapter *na; - struct netmap_adapter *hwna = NA(real); - struct netmap_adapter *hostna; - int error; - - - bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO); - if (bna == NULL) - return ENOMEM; - - na = &bna->up.up; - na->ifp = fake; - /* fill the ring data for the bwrap adapter with rx/tx meanings - * swapped. The real cross-linking will be done during register, - * when all the krings will have been created. - */ - na->num_rx_rings = hwna->num_tx_rings; - na->num_tx_rings = hwna->num_rx_rings; - na->num_tx_desc = hwna->num_rx_desc; - na->num_rx_desc = hwna->num_tx_desc; - na->nm_dtor = netmap_bwrap_dtor; - na->nm_register = netmap_bwrap_register; - // na->nm_txsync = netmap_bwrap_txsync; - // na->nm_rxsync = netmap_bwrap_rxsync; - na->nm_config = netmap_bwrap_config; - na->nm_krings_create = netmap_bwrap_krings_create; - na->nm_krings_delete = netmap_bwrap_krings_delete; - na->nm_notify = netmap_bwrap_notify; - na->nm_mem = hwna->nm_mem; - na->na_private = na; /* prevent NIOCREGIF */ - bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ - - bna->hwna = hwna; - netmap_adapter_get(hwna); - hwna->na_private = bna; /* weak reference */ - - if (hwna->na_flags & NAF_HOST_RINGS) { - na->na_flags |= NAF_HOST_RINGS; - hostna = &bna->host.up; - hostna->ifp = hwna->ifp; - hostna->num_tx_rings = 1; - hostna->num_tx_desc = hwna->num_rx_desc; - hostna->num_rx_rings = 1; - hostna->num_rx_desc = hwna->num_tx_desc; - // hostna->nm_txsync = netmap_bwrap_host_txsync; - // hostna->nm_rxsync = netmap_bwrap_host_rxsync; - hostna->nm_notify = netmap_bwrap_host_notify; - hostna->nm_mem = na->nm_mem; - hostna->na_private = bna; - } - - ND("%s<->%s txr %d txd %d rxr %d rxd %d", - fake->if_xname, real->if_xname, - na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc); - - error = netmap_attach_common(na); - if (error) { - netmap_adapter_put(hwna); - free(bna, M_DEVBUF); - return error; - } - return 0; -} - - -void -netmap_init_bridges(void) -{ - int i; - bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ - for (i = 0; i < NM_BRIDGES; i++) - BDG_RWINIT(&nm_bridges[i]); -} -#endif /* WITH_VALE */ diff --git a/netmap/sys/modules/netmap/Makefile b/netmap/sys/modules/netmap/Makefile deleted file mode 100644 index 647cd10..0000000 --- a/netmap/sys/modules/netmap/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# $FreeBSD$ -# -# Compile netmap as a module, useful if you want a netmap bridge -# or loadable drivers. - -.PATH: ${.CURDIR}/../../dev/netmap -.PATH.h: ${.CURDIR}/../../net -CFLAGS += -I${.CURDIR}/../../ -KMOD = netmap -SRCS = device_if.h bus_if.h opt_netmap.h -SRCS += netmap.c netmap.h netmap_kern.h -SRCS += netmap_mem2.c netmap_mem2.h -SRCS += netmap_generic.c -SRCS += netmap_mbq.c netmap_mbq.h -SRCS += netmap_vale.c -SRCS += netmap_freebsd.c -SRCS += netmap_offloadings.c -SRCS += netmap_pipe.c - -.include diff --git a/netmap/sys/net/netmap.h b/netmap/sys/net/netmap.h deleted file mode 100644 index 3344422..0000000 --- a/netmap/sys/net/netmap.h +++ /dev/null @@ -1,550 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ - * - * Definitions of constants and the structures used by the netmap - * framework, for the part visible to both kernel and userspace. - * Detailed info on netmap is available with "man netmap" or at - * - * http://info.iet.unipi.it/~luigi/netmap/ - * - * This API is also used to communicate with the VALE software switch - */ - -#ifndef _NET_NETMAP_H_ -#define _NET_NETMAP_H_ - -#define NETMAP_API 11 /* current API version */ - -#define NETMAP_MIN_API 11 /* min and max versions accepted */ -#define NETMAP_MAX_API 15 -/* - * Some fields should be cache-aligned to reduce contention. - * The alignment is architecture and OS dependent, but rather than - * digging into OS headers to find the exact value we use an estimate - * that should cover most architectures. - */ -#define NM_CACHE_ALIGN 128 - -/* - * --- Netmap data structures --- - * - * The userspace data structures used by netmap are shown below. - * They are allocated by the kernel and mmap()ed by userspace threads. - * Pointers are implemented as memory offsets or indexes, - * so that they can be easily dereferenced in kernel and userspace. - - KERNEL (opaque, obviously) - - ==================================================================== - | - USERSPACE | struct netmap_ring - +---->+---------------+ - / | head,cur,tail | - struct netmap_if (nifp, 1 per fd) / | buf_ofs | - +---------------+ / | other fields | - | ni_tx_rings | / +===============+ - | ni_rx_rings | / | buf_idx, len | slot[0] - | | / | flags, ptr | - | | / +---------------+ - +===============+ / | buf_idx, len | slot[1] - | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | - | txring_ofs[1] | +---------------+ - (tx+1 entries) (num_slots entries) - | txring_ofs[t] | | buf_idx, len | slot[n-1] - +---------------+ | flags, ptr | - | rxring_ofs[0] | +---------------+ - | rxring_ofs[1] | - (rx+1 entries) - | rxring_ofs[r] | - +---------------+ - - * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to - * a file descriptor, the mmap()ed region contains a (logically readonly) - * struct netmap_if pointing to struct netmap_ring's. - * - * There is one netmap_ring per physical NIC ring, plus one tx/rx ring - * pair attached to the host stack (this pair is unused for non-NIC ports). - * - * All physical/host stack ports share the same memory region, - * so that zero-copy can be implemented between them. - * VALE switch ports instead have separate memory regions. - * - * The netmap_ring is the userspace-visible replica of the NIC ring. - * Each slot has the index of a buffer (MTU-sized and residing in the - * mmapped region), its length and some flags. An extra 64-bit pointer - * is provided for user-supplied buffers in the tx path. - * - * In user space, the buffer address is computed as - * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE - * - * Added in NETMAP_API 11: - * - * + NIOCREGIF can request the allocation of extra spare buffers from - * the same memory pool. The desired number of buffers must be in - * nr_arg3. The ioctl may return fewer buffers, depending on memory - * availability. nr_arg3 will return the actual value, and, once - * mapped, nifp->ni_bufs_head will be the index of the first buffer. - * - * The buffers are linked to each other using the first uint32_t - * as the index. On close, ni_bufs_head must point to the list of - * buffers to be released. - * - * + NIOCREGIF can request space for extra rings (and buffers) - * allocated in the same memory space. The number of extra rings - * is in nr_arg1, and is advisory. This is a no-op on NICs where - * the size of the memory space is fixed. - * - * + NIOCREGIF can attach to PIPE rings sharing the same memory - * space with a parent device. The ifname indicates the parent device, - * which must already exist. Flags in nr_flags indicate if we want to - * bind the master or slave side, the index (from nr_ringid) - * is just a cookie and does need to be sequential. - * - * + NIOCREGIF can also attach to 'monitor' rings that replicate - * the content of specific rings, also from the same memory space. - * - * Extra flags in nr_flags support the above functions. - * Application libraries may use the following naming scheme: - * netmap:foo all NIC ring pairs - * netmap:foo^ only host ring pair - * netmap:foo+ all NIC ring + host ring pairs - * netmap:foo-k the k-th NIC ring pair - * netmap:foo{k PIPE ring pair k, master side - * netmap:foo}k PIPE ring pair k, slave side - */ - -/* - * struct netmap_slot is a buffer descriptor - */ -struct netmap_slot { - uint32_t buf_idx; /* buffer index */ - uint16_t len; /* length for this slot */ - uint16_t flags; /* buf changed, etc. */ - uint64_t ptr; /* pointer for indirect buffers */ -}; - -/* - * The following flags control how the slot is used - */ - -#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ - /* - * must be set whenever buf_idx is changed (as it might be - * necessary to recompute the physical address and mapping) - */ - -#define NS_REPORT 0x0002 /* ask the hardware to report results */ - /* - * Request notification when slot is used by the hardware. - * Normally transmit completions are handled lazily and - * may be unreported. This flag lets us know when a slot - * has been sent (e.g. to terminate the sender). - */ - -#define NS_FORWARD 0x0004 /* pass packet 'forward' */ - /* - * (Only for physical ports, rx rings with NR_FORWARD set). - * Slot released to the kernel (i.e. before ring->head) with - * this flag set are passed to the peer ring (host/NIC), - * thus restoring the host-NIC connection for these slots. - * This supports efficient traffic monitoring or firewalling. - */ - -#define NS_NO_LEARN 0x0008 /* disable bridge learning */ - /* - * On a VALE switch, do not 'learn' the source port for - * this buffer. - */ - -#define NS_INDIRECT 0x0010 /* userspace buffer */ - /* - * (VALE tx rings only) data is in a userspace buffer, - * whose address is in the 'ptr' field in the slot. - */ - -#define NS_MOREFRAG 0x0020 /* packet has more fragments */ - /* - * (VALE ports only) - * Set on all but the last slot of a multi-segment packet. - * The 'len' field refers to the individual fragment. - */ - -#define NS_PORT_SHIFT 8 -#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) - /* - * The high 8 bits of the flag, if not zero, indicate the - * destination port for the VALE switch, overriding - * the lookup table. - */ - -#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) - /* - * (VALE rx rings only) the high 8 bits - * are the number of fragments. - */ - - -/* - * struct netmap_ring - * - * Netmap representation of a TX or RX ring (also known as "queue"). - * This is a queue implemented as a fixed-size circular array. - * At the software level the important fields are: head, cur, tail. - * - * In TX rings: - * - * head first slot available for transmission. - * cur wakeup point. select() and poll() will unblock - * when 'tail' moves past 'cur' - * tail (readonly) first slot reserved to the kernel - * - * [head .. tail-1] can be used for new packets to send; - * 'head' and 'cur' must be incremented as slots are filled - * with new packets to be sent; - * 'cur' can be moved further ahead if we need more space - * for new transmissions. - * - * In RX rings: - * - * head first valid received packet - * cur wakeup point. select() and poll() will unblock - * when 'tail' moves past 'cur' - * tail (readonly) first slot reserved to the kernel - * - * [head .. tail-1] contain received packets; - * 'head' and 'cur' must be incremented as slots are consumed - * and can be returned to the kernel; - * 'cur' can be moved further ahead if we want to wait for - * new packets without returning the previous ones. - * - * DATA OWNERSHIP/LOCKING: - * The netmap_ring, and all slots and buffers in the range - * [head .. tail-1] are owned by the user program; - * the kernel only accesses them during a netmap system call - * and in the user thread context. - * - * Other slots and buffers are reserved for use by the kernel - */ -struct netmap_ring { - /* - * buf_ofs is meant to be used through macros. - * It contains the offset of the buffer region from this - * descriptor. - */ - const int64_t buf_ofs; - const uint32_t num_slots; /* number of slots in the ring. */ - const uint32_t nr_buf_size; - const uint16_t ringid; - const uint16_t dir; /* 0: tx, 1: rx */ - - uint32_t head; /* (u) first user slot */ - uint32_t cur; /* (u) wakeup point */ - uint32_t tail; /* (k) first kernel slot */ - - uint32_t flags; - - struct timeval ts; /* (k) time of last *sync() */ - - /* opaque room for a mutex or similar object */ - uint8_t sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN))); - - /* the slots follow. This struct has variable size */ - struct netmap_slot slot[0]; /* array of slots. */ -}; - - -/* - * RING FLAGS - */ -#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ - /* - * updates the 'ts' field on each netmap syscall. This saves - * saves a separate gettimeofday(), and is not much worse than - * software timestamps generated in the interrupt handler. - */ - -#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ - /* - * Enables the NS_FORWARD slot flag for the ring. - */ - - -/* - * Netmap representation of an interface and its queue(s). - * This is initialized by the kernel when binding a file - * descriptor to a port, and should be considered as readonly - * by user programs. The kernel never uses it. - * - * There is one netmap_if for each file descriptor on which we want - * to select/poll. - * select/poll operates on one or all pairs depending on the value of - * nmr_queueid passed on the ioctl. - */ -struct netmap_if { - char ni_name[IFNAMSIZ]; /* name of the interface. */ - const uint32_t ni_version; /* API version, currently unused */ - const uint32_t ni_flags; /* properties */ -#define NI_PRIV_MEM 0x1 /* private memory region */ - - /* - * The number of packet rings available in netmap mode. - * Physical NICs can have different numbers of tx and rx rings. - * Physical NICs also have a 'host' ring pair. - * Additionally, clients can request additional ring pairs to - * be used for internal communication. - */ - const uint32_t ni_tx_rings; /* number of HW tx rings */ - const uint32_t ni_rx_rings; /* number of HW rx rings */ - - uint32_t ni_bufs_head; /* head index for extra bufs */ - uint32_t ni_spare1[5]; - /* - * The following array contains the offset of each netmap ring - * from this structure, in the following order: - * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; - * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. - * - * The area is filled up by the kernel on NIOCREGIF, - * and then only read by userspace code. - */ - const ssize_t ring_ofs[0]; -}; - - -#ifndef NIOCREGIF -/* - * ioctl names and related fields - * - * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, - * whose identity is set in NIOCREGIF through nr_ringid. - * These are non blocking and take no argument. - * - * NIOCGINFO takes a struct ifreq, the interface name is the input, - * the outputs are number of queues and number of descriptor - * for each queue (useful to set number of threads etc.). - * The info returned is only advisory and may change before - * the interface is bound to a file descriptor. - * - * NIOCREGIF takes an interface name within a struct nmre, - * and activates netmap mode on the interface (if possible). - * - * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we - * can pass it down to other NIC-related ioctls. - * - * The actual argument (struct nmreq) has a number of options to request - * different functions. - * The following are used in NIOCREGIF when nr_cmd == 0: - * - * nr_name (in) - * The name of the port (em0, valeXXX:YYY, etc.) - * limited to IFNAMSIZ for backward compatibility. - * - * nr_version (in/out) - * Must match NETMAP_API as used in the kernel, error otherwise. - * Always returns the desired value on output. - * - * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) - * On input, non-zero values may be used to reconfigure the port - * according to the requested values, but this is not guaranteed. - * On output the actual values in use are reported. - * - * nr_ringid (in) - * Indicates how rings should be bound to the file descriptors. - * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) - * are used to indicate the ring number, and nr_flags specifies - * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. - * - * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: - * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control - * the binding as follows: - * 0 (default) binds all physical rings - * NETMAP_HW_RING | ring number binds a single ring pair - * NETMAP_SW_RING binds only the host tx/rx rings - * - * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push - * packets on tx rings only if POLLOUT is set. - * The default is to push any pending packet. - * - * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release - * packets on rx rings also when POLLIN is NOT set. - * The default is to touch the rx ring only with POLLIN. - * Note that this is the opposite of TX because it - * reflects the common usage. - * - * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. - * NETMAP_PRIV_MEM is set on return for ports that do not use - * the global memory allocator. - * This information is not significant and applications - * should look at the region id in nr_arg2 - * - * nr_flags is the recommended mode to indicate which rings should - * be bound to a file descriptor. Values are NR_REG_* - * - * nr_arg1 (in) The number of extra rings to be reserved. - * Especially when allocating a VALE port the system only - * allocates the amount of memory needed for the port. - * If more shared memory rings are desired (e.g. for pipes), - * the first invocation for the same basename/allocator - * should specify a suitable number. Memory cannot be - * extended after the first allocation without closing - * all ports on the same region. - * - * nr_arg2 (in/out) The identity of the memory region used. - * On input, 0 means the system decides autonomously, - * other values may try to select a specific region. - * On return the actual value is reported. - * Region '1' is the global allocator, normally shared - * by all interfaces. Other values are private regions. - * If two ports the same region zero-copy is possible. - * - * nr_arg3 (in/out) number of extra buffers to be allocated. - * - * - * - * nr_cmd (in) if non-zero indicates a special command: - * NETMAP_BDG_ATTACH and nr_name = vale*:ifname - * attaches the NIC to the switch; nr_ringid specifies - * which rings to use. Used by vale-ctl -a ... - * nr_arg1 = NETMAP_BDG_HOST also attaches the host port - * as in vale-ctl -h ... - * - * NETMAP_BDG_DETACH and nr_name = vale*:ifname - * disconnects a previously attached NIC. - * Used by vale-ctl -d ... - * - * NETMAP_BDG_LIST - * list the configuration of VALE switches. - * - * NETMAP_BDG_VNET_HDR - * Set the virtio-net header length used by the client - * of a VALE switch port. - * - * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific - * - * - * - */ - - -/* - * struct nmreq overlays a struct ifreq (just the name) - * - * On input, nr_ringid indicates which rings we are requesting, - * with the low flags for the specific ring number. - * selection FLAGS RING INDEX - * - * all the NIC rings 0x0000 - - * only HOST ring 0x2000 - - * single NIC ring 0x4000 ring index - * all the NIC+HOST rings 0x6000 - - * one pipe ring, master 0x8000 ring index - * *** INVALID 0xA000 - * one pipe ring, slave 0xC000 ring index - * *** INVALID 0xE000 - * - */ -struct nmreq { - char nr_name[IFNAMSIZ]; - uint32_t nr_version; /* API version */ - uint32_t nr_offset; /* nifp offset in the shared region */ - uint32_t nr_memsize; /* size of the shared region */ - uint32_t nr_tx_slots; /* slots in tx rings */ - uint32_t nr_rx_slots; /* slots in rx rings */ - uint16_t nr_tx_rings; /* number of tx rings */ - uint16_t nr_rx_rings; /* number of rx rings */ - - uint16_t nr_ringid; /* ring(s) we care about */ -#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ -#define NETMAP_SW_RING 0x2000 /* only host ring pair */ - -#define NETMAP_RING_MASK 0x0fff /* the ring number */ - -#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ - -#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ - - uint16_t nr_cmd; -#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ -#define NETMAP_BDG_DETACH 2 /* detach the NIC */ -#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */ -#define NETMAP_BDG_LIST 4 /* get bridge's info */ -#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ -#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ - - uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ -#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ - - uint16_t nr_arg2; - uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ - uint32_t nr_flags; - /* various modes, extends nr_ringid */ - uint32_t spare2[1]; -}; - -#define NR_REG_MASK 0xf /* values for nr_flags */ -enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ - NR_REG_ALL_NIC = 1, - NR_REG_SW = 2, - NR_REG_NIC_SW = 3, - NR_REG_ONE_NIC = 4, - NR_REG_PIPE_MASTER = 5, - NR_REG_PIPE_SLAVE = 6, -}; -/* monitor uses the NR_REG to select the rings to monitor */ -#define NR_MONITOR_TX 0x100 -#define NR_MONITOR_RX 0x200 - - -/* - * FreeBSD uses the size value embedded in the _IOWR to determine - * how much to copy in/out. So we need it to match the actual - * data structure we pass. We put some spares in the structure - * to ease compatibility with other versions - */ -#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ -#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ -#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ -#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ -#endif /* !NIOCREGIF */ - - -/* - * Helper functions for kernel and userspace - */ - -/* - * check if space is available in the ring. - */ -static inline int -nm_ring_empty(struct netmap_ring *ring) -{ - return (ring->cur == ring->tail); -} - -#endif /* _NET_NETMAP_H_ */ diff --git a/netmap/sys/net/netmap_user.h b/netmap/sys/net/netmap_user.h deleted file mode 100644 index c9695c2..0000000 --- a/netmap/sys/net/netmap_user.h +++ /dev/null @@ -1,677 +0,0 @@ -/* - * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - * - * Functions and macros to manipulate netmap structures and packets - * in userspace. See netmap(4) for more information. - * - * The address of the struct netmap_if, say nifp, is computed from the - * value returned from ioctl(.., NIOCREG, ...) and the mmap region: - * ioctl(fd, NIOCREG, &req); - * mem = mmap(0, ... ); - * nifp = NETMAP_IF(mem, req.nr_nifp); - * (so simple, we could just do it manually) - * - * From there: - * struct netmap_ring *NETMAP_TXRING(nifp, index) - * struct netmap_ring *NETMAP_RXRING(nifp, index) - * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags - * - * ring->slot[i] gives us the i-th slot (we can access - * directly len, flags, buf_idx) - * - * char *buf = NETMAP_BUF(ring, x) returns a pointer to - * the buffer numbered x - * - * All ring indexes (head, cur, tail) should always move forward. - * To compute the next index in a circular ring you can use - * i = nm_ring_next(ring, i); - * - * To ease porting apps from pcap to netmap we supply a few fuctions - * that can be called to open, close, read and write on netmap in a way - * similar to libpcap. Note that the read/write function depend on - * an ioctl()/select()/poll() being issued to refill rings or push - * packets out. - * - * In order to use these, include #define NETMAP_WITH_LIBS - * in the source file that invokes these functions. - */ - -#ifndef _NET_NETMAP_USER_H_ -#define _NET_NETMAP_USER_H_ - -#include -#include /* apple needs sockaddr */ -#include /* IFNAMSIZ */ - -#ifndef likely -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) -#endif /* likely and unlikely */ - -#include - -/* helper macro */ -#define _NETMAP_OFFSET(type, ptr, offset) \ - ((type)(void *)((char *)(ptr) + (offset))) - -#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) - -#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ - nifp, (nifp)->ring_ofs[index] ) - -#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ - nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) - -#define NETMAP_BUF(ring, index) \ - ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) - -#define NETMAP_BUF_IDX(ring, buf) \ - ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ - (ring)->nr_buf_size ) - - -static inline uint32_t -nm_ring_next(struct netmap_ring *r, uint32_t i) -{ - return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); -} - - -/* - * Return 1 if we have pending transmissions in the tx ring. - * When everything is complete ring->head = ring->tail + 1 (modulo ring size) - */ -static inline int -nm_tx_pending(struct netmap_ring *r) -{ - return nm_ring_next(r, r->tail) != r->head; -} - - -static inline uint32_t -nm_ring_space(struct netmap_ring *ring) -{ - int ret = ring->tail - ring->cur; - if (ret < 0) - ret += ring->num_slots; - return ret; -} - - -#ifdef NETMAP_WITH_LIBS -/* - * Support for simple I/O libraries. - * Include other system headers required for compiling this. - */ - -#ifndef HAVE_NETMAP_WITH_LIBS -#define HAVE_NETMAP_WITH_LIBS - -#include -#include -#include /* memset */ -#include -#include /* EINVAL */ -#include /* O_RDWR */ -#include /* close() */ -#include -#include - -#ifndef ND /* debug macros */ -/* debug support */ -#define ND(_fmt, ...) do {} while(0) -#define D(_fmt, ...) \ - do { \ - struct timeval t0; \ - gettimeofday(&t0, NULL); \ - fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ - (int)(t0.tv_sec % 1000), (int)t0.tv_usec, \ - __FUNCTION__, __LINE__, ##__VA_ARGS__); \ - } while (0) - -/* Rate limited version of "D", lps indicates how many per second */ -#define RD(lps, format, ...) \ - do { \ - static int t0, __cnt; \ - struct timeval __xxts; \ - gettimeofday(&__xxts, NULL); \ - if (t0 != __xxts.tv_sec) { \ - t0 = __xxts.tv_sec; \ - __cnt = 0; \ - } \ - if (__cnt++ < lps) { \ - D(format, ##__VA_ARGS__); \ - } \ - } while (0) -#endif - -struct nm_pkthdr { /* same as pcap_pkthdr */ - struct timeval ts; - uint32_t caplen; - uint32_t len; -}; - -struct nm_stat { /* same as pcap_stat */ - u_int ps_recv; - u_int ps_drop; - u_int ps_ifdrop; -#ifdef WIN32 - u_int bs_capt; -#endif /* WIN32 */ -}; - -#define NM_ERRBUF_SIZE 512 - -struct nm_desc { - struct nm_desc *self; /* point to self if netmap. */ - int fd; - void *mem; - int memsize; - int done_mmap; /* set if mem is the result of mmap */ - struct netmap_if * const nifp; - uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; - uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; - struct nmreq req; /* also contains the nr_name = ifname */ - struct nm_pkthdr hdr; - - /* - * The memory contains netmap_if, rings and then buffers. - * Given a pointer (e.g. to nm_inject) we can compare with - * mem/buf_start/buf_end to tell if it is a buffer or - * some other descriptor in our region. - * We also store a pointer to some ring as it helps in the - * translation from buffer indexes to addresses. - */ - struct netmap_ring * const some_ring; - void * const buf_start; - void * const buf_end; - /* parameters from pcap_open_live */ - int snaplen; - int promisc; - int to_ms; - char *errbuf; - - /* save flags so we can restore them on close */ - uint32_t if_flags; - uint32_t if_reqcap; - uint32_t if_curcap; - - struct nm_stat st; - char msg[NM_ERRBUF_SIZE]; -}; - -/* - * when the descriptor is open correctly, d->self == d - * Eventually we should also use some magic number. - */ -#define P2NMD(p) ((struct nm_desc *)(p)) -#define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) -#define NETMAP_FD(d) (P2NMD(d)->fd) - - -/* - * this is a slightly optimized copy routine which rounds - * to multiple of 64 bytes and is often faster than dealing - * with other odd sizes. We assume there is enough room - * in the source and destination buffers. - * - * XXX only for multiples of 64 bytes, non overlapped. - */ -static inline void -nm_pkt_copy(const void *_src, void *_dst, int l) -{ - const uint64_t *src = (const uint64_t *)_src; - uint64_t *dst = (uint64_t *)_dst; - - if (unlikely(l >= 1024)) { - memcpy(dst, src, l); - return; - } - for (; likely(l > 0); l-=64) { - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } -} - - -/* - * The callback, invoked on each received packet. Same as libpcap - */ -typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); - -/* - *--- the pcap-like API --- - * - * nm_open() opens a file descriptor, binds to a port and maps memory. - * - * ifname (netmap:foo or vale:foo) is the port name - * a suffix can indicate the follwing: - * ^ bind the host (sw) ring pair - * * bind host and NIC ring pairs (transparent) - * -NN bind individual NIC ring pair - * {NN bind master side of pipe NN - * }NN bind slave side of pipe NN - * - * req provides the initial values of nmreq before parsing ifname. - * Remember that the ifname parsing will override the ring - * number in nm_ringid, and part of nm_flags; - * flags special functions, normally 0 - * indicates which fields of *arg are significant - * arg special functions, normally NULL - * if passed a netmap_desc with mem != NULL, - * use that memory instead of mmap. - */ - -static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, - uint64_t flags, const struct nm_desc *arg); - -/* - * nm_open can import some fields from the parent descriptor. - * These flags control which ones. - * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, - * which set the initial value for these flags. - * Note that the 16 low bits of the flags are reserved for data - * that may go into the nmreq. - */ -enum { - NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ - NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ - NM_OPEN_ARG1 = 0x100000, - NM_OPEN_ARG2 = 0x200000, - NM_OPEN_ARG3 = 0x400000, - NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ -}; - - -/* - * nm_close() closes and restores the port to its previous state - */ - -static int nm_close(struct nm_desc *); - -/* - * nm_inject() is the same as pcap_inject() - * nm_dispatch() is the same as pcap_dispatch() - * nm_nextpkt() is the same as pcap_next() - */ - -static int nm_inject(struct nm_desc *, const void *, size_t); -static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); -static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); - - -/* - * Try to open, return descriptor if successful, NULL otherwise. - * An invalid netmap name will return errno = 0; - * You can pass a pointer to a pre-filled nm_desc to add special - * parameters. Flags is used as follows - * NM_OPEN_NO_MMAP use the memory from arg, only - * if the nr_arg2 (memory block) matches. - * NM_OPEN_ARG1 use req.nr_arg1 from arg - * NM_OPEN_ARG2 use req.nr_arg2 from arg - * NM_OPEN_RING_CFG user ring config from arg - */ -static struct nm_desc * -nm_open(const char *ifname, const struct nmreq *req, - uint64_t new_flags, const struct nm_desc *arg) -{ - struct nm_desc *d = NULL; - const struct nm_desc *parent = arg; - u_int namelen; - uint32_t nr_ringid = 0, nr_flags; - const char *port = NULL; - const char *errmsg = NULL; - - if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { - errno = 0; /* name not recognised, not an error */ - return NULL; - } - if (ifname[0] == 'n') - ifname += 7; - /* scan for a separator */ - for (port = ifname; *port && !index("-*^{}", *port); port++) - ; - namelen = port - ifname; - if (namelen >= sizeof(d->req.nr_name)) { - errmsg = "name too long"; - goto fail; - } - switch (*port) { - default: /* '\0', no suffix */ - nr_flags = NR_REG_ALL_NIC; - break; - case '-': /* one NIC */ - nr_flags = NR_REG_ONE_NIC; - nr_ringid = atoi(port + 1); - break; - case '*': /* NIC and SW, ignore port */ - nr_flags = NR_REG_NIC_SW; - if (port[1]) { - errmsg = "invalid port for nic+sw"; - goto fail; - } - break; - case '^': /* only sw ring */ - nr_flags = NR_REG_SW; - if (port[1]) { - errmsg = "invalid port for sw ring"; - goto fail; - } - break; - case '{': - nr_flags = NR_REG_PIPE_MASTER; - nr_ringid = atoi(port + 1); - break; - case '}': - nr_flags = NR_REG_PIPE_SLAVE; - nr_ringid = atoi(port + 1); - break; - } - - if (nr_ringid >= NETMAP_RING_MASK) { - errmsg = "invalid ringid"; - goto fail; - } - /* add the *XPOLL flags */ - nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); - - d = (struct nm_desc *)calloc(1, sizeof(*d)); - if (d == NULL) { - errmsg = "nm_desc alloc failure"; - errno = ENOMEM; - return NULL; - } - d->self = d; /* set this early so nm_close() works */ - d->fd = open("/dev/netmap", O_RDWR); - if (d->fd < 0) { - errmsg = "cannot open /dev/netmap"; - goto fail; - } - - if (req) - d->req = *req; - d->req.nr_version = NETMAP_API; - d->req.nr_ringid &= ~NETMAP_RING_MASK; - - /* these fields are overridden by ifname and flags processing */ - d->req.nr_ringid |= nr_ringid; - d->req.nr_flags = nr_flags; - memcpy(d->req.nr_name, ifname, namelen); - d->req.nr_name[namelen] = '\0'; - /* optionally import info from parent */ - if (IS_NETMAP_DESC(parent) && new_flags) { - if (new_flags & NM_OPEN_ARG1) - D("overriding ARG1 %d", parent->req.nr_arg1); - d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? - parent->req.nr_arg1 : 4; - if (new_flags & NM_OPEN_ARG2) - D("overriding ARG2 %d", parent->req.nr_arg2); - d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ? - parent->req.nr_arg2 : 0; - if (new_flags & NM_OPEN_ARG3) - D("overriding ARG3 %d", parent->req.nr_arg3); - d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? - parent->req.nr_arg3 : 0; - if (new_flags & NM_OPEN_RING_CFG) { - D("overriding RING_CFG"); - d->req.nr_tx_slots = parent->req.nr_tx_slots; - d->req.nr_rx_slots = parent->req.nr_rx_slots; - d->req.nr_tx_rings = parent->req.nr_tx_rings; - d->req.nr_rx_rings = parent->req.nr_rx_rings; - } - if (new_flags & NM_OPEN_IFNAME) { - D("overriding ifname %s ringid 0x%x flags 0x%x", - parent->req.nr_name, parent->req.nr_ringid, - parent->req.nr_flags); - memcpy(d->req.nr_name, parent->req.nr_name, - sizeof(d->req.nr_name)); - d->req.nr_ringid = parent->req.nr_ringid; - d->req.nr_flags = parent->req.nr_flags; - } - } - if (ioctl(d->fd, NIOCREGIF, &d->req)) { - errmsg = "NIOCREGIF failed"; - goto fail; - } - - if (IS_NETMAP_DESC(parent) && parent->mem && - parent->req.nr_arg2 == d->req.nr_arg2) { - /* do not mmap, inherit from parent */ - d->memsize = parent->memsize; - d->mem = parent->mem; - } else { - d->memsize = d->req.nr_memsize; - d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, - d->fd, 0); - if (d->mem == NULL) { - errmsg = "mmap failed"; - goto fail; - } - d->done_mmap = 1; - } - { - struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); - struct netmap_ring *r = NETMAP_RXRING(nifp, ); - - *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; - *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; - *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); - *(void **)(uintptr_t)&d->buf_end = - (char *)d->mem + d->memsize; - } - - if (nr_flags == NR_REG_SW) { /* host stack */ - d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; - d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; - } else if (nr_flags == NR_REG_ALL_NIC) { /* only nic */ - d->first_tx_ring = 0; - d->first_rx_ring = 0; - d->last_tx_ring = d->req.nr_tx_rings - 1; - d->last_rx_ring = d->req.nr_rx_rings - 1; - } else if (nr_flags == NR_REG_NIC_SW) { - d->first_tx_ring = 0; - d->first_rx_ring = 0; - d->last_tx_ring = d->req.nr_tx_rings; - d->last_rx_ring = d->req.nr_rx_rings; - } else if (nr_flags == NR_REG_ONE_NIC) { - /* XXX check validity */ - d->first_tx_ring = d->last_tx_ring = - d->first_rx_ring = d->last_rx_ring = nr_ringid; - } else { /* pipes */ - d->first_tx_ring = d->last_tx_ring = 0; - d->first_rx_ring = d->last_rx_ring = 0; - } - -#ifdef DEBUG_NETMAP_USER - { /* debugging code */ - int i; - - D("%s tx %d .. %d %d rx %d .. %d %d", ifname, - d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, - d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); - for (i = 0; i <= d->req.nr_tx_rings; i++) { - struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); - D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); - } - for (i = 0; i <= d->req.nr_rx_rings; i++) { - struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); - D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); - } - } -#endif /* debugging */ - - d->cur_tx_ring = d->first_tx_ring; - d->cur_rx_ring = d->first_rx_ring; - return d; - -fail: - nm_close(d); - if (errmsg) - D("%s %s", errmsg, ifname); - errno = EINVAL; - return NULL; -} - - -static int -nm_close(struct nm_desc *d) -{ - /* - * ugly trick to avoid unused warnings - */ - static void *__xxzt[] __attribute__ ((unused)) = - { (void *)nm_open, (void *)nm_inject, - (void *)nm_dispatch, (void *)nm_nextpkt } ; - - if (d == NULL || d->self != d) - return EINVAL; - if (d->done_mmap && d->mem) - munmap(d->mem, d->memsize); - if (d->fd != -1) - close(d->fd); - bzero(d, sizeof(*d)); - free(d); - return 0; -} - - -/* - * Same prototype as pcap_inject(), only need to cast. - */ -static int -nm_inject(struct nm_desc *d, const void *buf, size_t size) -{ - u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; - - for (c = 0; c < n ; c++) { - /* compute current ring to use */ - struct netmap_ring *ring; - uint32_t i, idx; - uint32_t ri = d->cur_tx_ring + c; - - if (ri > d->last_tx_ring) - ri = d->first_tx_ring; - ring = NETMAP_TXRING(d->nifp, ri); - if (nm_ring_empty(ring)) { - continue; - } - i = ring->cur; - idx = ring->slot[i].buf_idx; - ring->slot[i].len = size; - nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); - d->cur_tx_ring = ri; - ring->head = ring->cur = nm_ring_next(ring, i); - return size; - } - return 0; /* fail */ -} - - -/* - * Same prototype as pcap_dispatch(), only need to cast. - */ -static int -nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) -{ - int n = d->last_rx_ring - d->first_rx_ring + 1; - int c, got = 0, ri = d->cur_rx_ring; - - if (cnt == 0) - cnt = -1; - /* cnt == -1 means infinite, but rings have a finite amount - * of buffers and the int is large enough that we never wrap, - * so we can omit checking for -1 - */ - for (c=0; c < n && cnt != got; c++) { - /* compute current ring to use */ - struct netmap_ring *ring; - - ri = d->cur_rx_ring + c; - if (ri > d->last_rx_ring) - ri = d->first_rx_ring; - ring = NETMAP_RXRING(d->nifp, ri); - for ( ; !nm_ring_empty(ring) && cnt != got; got++) { - u_int i = ring->cur; - u_int idx = ring->slot[i].buf_idx; - u_char *buf = (u_char *)NETMAP_BUF(ring, idx); - - // __builtin_prefetch(buf); - d->hdr.len = d->hdr.caplen = ring->slot[i].len; - d->hdr.ts = ring->ts; - cb(arg, &d->hdr, buf); - ring->head = ring->cur = nm_ring_next(ring, i); - } - } - d->cur_rx_ring = ri; - return got; -} - -static u_char * -nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) -{ - int ri = d->cur_rx_ring; - - do { - /* compute current ring to use */ - struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); - if (!nm_ring_empty(ring)) { - u_int i = ring->cur; - u_int idx = ring->slot[i].buf_idx; - u_char *buf = (u_char *)NETMAP_BUF(ring, idx); - - // __builtin_prefetch(buf); - hdr->ts = ring->ts; - hdr->len = hdr->caplen = ring->slot[i].len; - ring->cur = nm_ring_next(ring, i); - /* we could postpone advancing head if we want - * to hold the buffer. This can be supported in - * the future. - */ - ring->head = ring->cur; - d->cur_rx_ring = ri; - return buf; - } - ri++; - if (ri > d->last_rx_ring) - ri = d->first_rx_ring; - } while (ri != d->cur_rx_ring); - return NULL; /* nothing found */ -} - -#endif /* !HAVE_NETMAP_WITH_LIBS */ - -#endif /* NETMAP_WITH_LIBS */ - -#endif /* _NET_NETMAP_USER_H_ */