remove unneeded netmap code

This commit is contained in:
Babak Farrokhi 2016-04-06 13:46:22 +04:30
parent c1ac6d9f38
commit cbeffe8f87
92 changed files with 0 additions and 31228 deletions

View File

@ -1,169 +0,0 @@
# To build external modules, you must have a prebuilt kernel available
# that contains the configuration and header files used in the build.
# go in the kernel directory and do a
# make oldconfig; make scripts; make prepare
# or make defconfig; make scripts; make prepare
#
# list of objects for this module
#
# objects whose source file is in ../sys/dev/netmap
remoteobjs := netmap.o netmap_mem2.o \
netmap_generic.o netmap_mbq.o netmap_vale.o \
netmap_offloadings.o netmap_pipe.o
# all objects
netmap_lin-objs := $(remoteobjs) netmap_linux.o
obj-$(CONFIG_NETMAP) = netmap_lin.o
ifndef NODRIVERS
# list of modules to be built (actually also forcedeth and r8169)
MOD_LIST:= CONFIG_E1000=m CONFIG_E1000E=m \
CONFIG_IXGBE=m CONFIG_IGB=m \
CONFIG_BNX2X=m CONFIG_MLX4=m \
CONFIG_VIRTIO_NET=m
obj-m += $(O_DRIVERS)
GET_DRIVERS := get-drivers
else
MOD_LIST:=
endif
# DRIVER_SRCS names of the driver sources is only used to
# clean files that we copied.
DRIVER_SRCS = r8169.c forcedeth.c e1000/ e1000e/ ixgbe/ igb/
DRIVER_SRCS += bnx2x/ mellanox/ mlx4/ virtio_net.c
# _DRV_SUBDIRS contains the subdirs with driver sources.
# In old linuxes everything is under drivers/net, newer versions
# have them in source/drivers/net/ethernet/$(manufacturer)
_DRV_SUBDIRS= nvidia realtek intel broadcom . ..
# The following commands are needed to build the modules as out-of-tree,
# in fact the kernel sources path must be specified.
PWD ?= $(CURDIR)
M:=$(PWD)
# Additional compile flags (e.g. header location)
EXTRA_CFLAGS := -I$(M) -I$(M)/../sys -I$(M)/../sys/dev -DCONFIG_NETMAP
EXTRA_CFLAGS += -Wno-unused-but-set-variable
# We use KSRC for the kernel configuration and sources.
# If the sources are elsewhere, then use SRC to point to them.
KSRC ?= /lib/modules/$(shell uname -r)/build
SRC ?= $(KSRC)
# extract version number.
# version.h can be in two different places.
# NOTE- A.B.C translates to aXXYY where XXYY are hex
LIN_VER = $(shell V=linux/version.h; G=. ; \
[ -f $(KSRC)/include/$${V} ] || G=generated/uapi ;\
grep LINUX_VERSION_CODE $(KSRC)/include/$${G}/linux/version.h | \
awk '{printf "%03x%02x", $$3/256, $$3%256} ')
# produce a list of applicable patches for this version
PATCHES := $(shell \
cd $(PWD)/patches; ls diff--* | awk -v v=$(LIN_VER) -F -- \
'{ if ((!$$3 || $$3 <= v) && (!$$4 || v < $$4)) print $$0; }')
# source drivers to copy. Names derived from the patches
S_DRIVERS := $(shell \
cd $(PWD)/patches; ls diff--* | awk -v v=$(LIN_VER) -F -- \
'{ if ((!$$3 || $$3 <= v) && (!$$4 || v < $$4)) print $$2 }' )
# actual drivers after copy and patch
DRIVERS = $(shell [ "$(PATCHES)" != "" ] && ls -dAp \
`echo $(PATCHES:diff--%=%) | sed -r 's/--[0-9a-f-]+//g'` 2> /dev/null)
# Compile v1000 (vhost porting to e1000) only if
# the LIN_VER >= 3.8.0, because we don't want to deal
# with backporting problems for v1000.
ifeq ($(word 1, $(sort 30800 $(LIN_VER))), 30800)
CONFIG_V1000:=m
else
CONFIG_V1000:=n
endif
CONFIG_V1000:=n # force disable by now
obj-$(CONFIG_V1000) += vhost-port/
all: build
build: $(GET_DRIVERS)
$(MAKE) -C $(KSRC) M=$(PWD) CONFIG_NETMAP=m $(MOD_LIST) \
EXTRA_CFLAGS='$(EXTRA_CFLAGS)' \
O_DRIVERS="$(DRIVERS:%.c=%.o)" modules
@ls -l `find . -name \*.ko`
test:
@echo "version $(LIN_VER)"
@echo "patches $(PATCHES)"
@echo "drivers $(DRIVERS)"
clean:
-@ $(MAKE) -C $(KSRC) M=$(PWD) clean 2> /dev/null
-@ (rm -rf $(DRIVER_SRCS) *.orig *.rej *.ko *.o .*.d \
.tmp_versions *.mod.c modules.order \
Module.symvers .*.cmd get-drivers )
# the source is not here so we need to specify a dependency
define remote_template
$$(obj)/$(1): $$(M)/../sys/dev/netmap/$(1:.o=.c)
$$(call cmd,cc_o_c)
$$(call cmd,modversions)
endef
$(foreach o,$(remoteobjs),$(eval $(call remote_template,$(o))))
#-- copy and patch initial files
# The location changes depending on the OS version, so ...
get-drivers:
-@( \
if [ -d "$(DRIVER_SRC)" ] ; then \
cd "$(DRIVER_SRC)"; s=.; what="`ls -dp *`" ; \
else \
cd $(SRC); [ -d source ] && cd source ; \
cd drivers/net; s=. ; \
[ -d ethernet ] && cd ethernet && s="$(_DRV_SUBDIRS)" ; \
what="$(S_DRIVERS)" ; \
fi ; \
echo "LIN_VER $(LIN_VER)" ; \
[ "$${what}" = "" ] && echo "-- NO DRIVERS --" && return; \
echo "---- Building from `pwd`"; \
echo "---- copying $${what} ---" ; \
what="$${what} cnic_if.h"; \
for i in $$s; do (cd $$i ; \
echo " From `pwd` :"; \
ls -ldp $${what} 2> /dev/null | sed 's/^/ /' ; \
cp -Rp $${what} $(PWD) 2>/dev/null ); \
done ; \
cd $(PWD) ; \
for i in $(PATCHES) ; \
do echo "** patch with $$i"; \
patch --posix --quiet --force -p1 < patches/$$i; \
done ; \
echo "Building the following drivers: $(S_DRIVERS)" )
@touch get-drivers
test3:
@echo "from $(PATCHES) -- to $(MYDRIVERS)"
@echo "Drivers is $(DRIVERS)"
@echo "Actually have `ls -d $(DRIVERS) 2> /dev/null`"
# compute the diffs for the original files
diffs:
@for i in `find . -name \*.orig`; do \
diff -urp $$i $${i%.orig} ; \
done
apps:
(cd ../examples; $(MAKE))
+%:
@echo $($*)

View File

@ -1,154 +0,0 @@
# $Id: README 10863 2012-04-11 17:10:39Z luigi $
NETMAP FOR LINUX
----------------
This directory contains a version of the "netmap" and "VALE" code for Linux.
Netmap is a BSD-licensed framework that supports line-rate direct packet
I/O even on 10GBit/s interfaces (14.88Mpps) with limited system load,
and includes a libpcap emulation library to port applications.
See
http://info.iet.unipi.it/~luigi/netmap/
for more details. There you can also find the latest versions
of the code and documentation as well as pre-built TinyCore
images based on linux 3.0.3 and containing the netmap modules
and some test applications.
This version supports r8169, ixgbe, igb, e1000, e1000e and forcedeth.
Netmap relies on a kernel module (netmap_lin.ko) and slightly modified
device drivers. Userspace programs can use the native API (documented
in netmap.4) or a libpcap emulation library.
The FreeBSD and Linux versions share the same codebase, which
is located in ../sys . For Linux we use some additional glue code,
(bsd_glue.h).
Device drivers are taken directly from the Linux distributions,
and patched using the files in the patches/ directory.
Common driver modifications are in the .h files in this directory.
HOW TO BUILD THE CODE
---------------------
1. make sure you have kernel sources/headers matching your installed system
2. do the following
make clean; make KSRC=/usr/src/linux-kernel-source-or-headers
this produces ./netmap_lin.ko and other kernel modules.
3. to build sample applications, run
(cd ../examples; make )
(you will need the pthreads and libpcap-dev packages to build them)
If you want support for additional drivers please have a look at
ixgbe_netmap_linux.h and the patches in patches/
The patch file are named as diff--DRIVER--LOW--HIGH--otherstuff
where DRIVER is the driver name to patch, LOW and HIGH are the
versions to which the patch applies (LOW included, HIGH excluded, so
diff--r8169.c--20638--30300--ok applies from 2.6.38 to 3.3.0 (excluded)
HOW TO USE THE CODE
-------------------
REMEMBER
THIS IS EXPERIMENTAL CODE WHICH MAY CRASH YOUR SYSTEM.
USE IT AT YOUR OWN RISk.
Whether you built your own modules, or are using the prebuilt
TinyCore image, the following steps can be used for initial testing:
1. unload any modules for the network cards you want to use, e.g.
sudo rmmod ixgbe
sudo rmmod e1000
...
2. load netmap and device driver module
sudo insmod ./netmap_lin.ko
sudo insmod ./ixgbe/ixgbe.ko
sudo insmod ./e1000/e1000.ko
...
3. turn the interface(s) up
sudo ifconfig eth0 up # and same for others
4. Run test applications -- as an example, pkt-gen is a raw packet
sender/receiver which can do line rate on a 10G interface
# send about 500 million packets of 60 bytes each.
# wait 5s before starting, so the link can go up
sudo pkt-gen -i eth0 -f tx -n 500111222 -l 60 -w 5
# you should see about 14.88 Mpps
sudo pkt-gen -i eth0 -f rx # act as a receiver
COMMON PROBLEMS
----------------
* switching in/out of netmap mode causes the link to go down and up.
If your card is connected to a switch with spanning tree enabled,
the switch will likely MUTE THE LINK FOR 10 SECONDS while it is
detecting the new topology. Either disable the spanning tree on
the switch or use long pauses before sending data;
* Not all cards can do line rate no matter how fast is your software or
CPU. Several have hardware limitations that prevent reaching the peak
speed, especially for small packet sizes. Examples:
- ixgbe cannot receive at line rate with packet sizes that are
not multiple of 64 (after CRC stripping).
This is especially evident with minimum-sized frames (-l 60 )
- some of the low-end 'e1000' cards can send 1.2 - 1.3Mpps instead
of the theoretical maximum (1.488Mpps)
- the 'realtek' cards seem unable to send more than 450-500Kpps
even though they can receive at least 1.1Mpps
* if the link is not up when the packet generator starts, you will
see frequent messages about a link reset. While we work on a fix,
use the '-w' argument on the generator to specify a longer timeout
* the ixgbe driver (and perhaps others) is severely slowed down if the
remote party is senting flow control frames to slow down traffic.
If that happens try to use the ethtool command to disable flow control.
REVISION HISTORY
-----------------
20120813 - updated distribution using common code for FreeBSD and Linux,
and inclusion of drivers from the linux source tree
20120322 - fixed the 'igb' driver, now it can send and receive correctly
(the problem was in netmap_rx_irq() so it might have affected
other multiqueue cards).
Also tested the 'r8169' in transmit mode.
Added comments on switches and spanning tree.
20120217 - initial version. Only ixgbe, e1000 and e1000e are working.
Other drivers (igb, r8169, forcedeth) are supplied only as a
proof of concept.
DETAILS
--------
+ igb: on linux 3.2 and above the igb driver moved to split buffers,
and netmap was not updated until end of june 2013.
Symptoms were inability to receive short packets.
+ there are reports of ixgbe and igb unable to read packets.
We are unable to reproduce the problem.
- Ubuntu 12.04 LTS 3.5.0-25-generic. igb read problems ?
- 3.2.0-32-generic with 82598 not working
+ if_e1000_e uses regular descriptor up 3.1 at least
3.2.32 is reported to use extended descriptors
(in my repo updated at -r 11975)

View File

@ -1,74 +0,0 @@
# See http://wiki.archlinux.org/index.php/VCS_PKGBUILD_Guidelines
# for more information on packaging from GIT sources.
# Maintainer: Vincenzo Maffione <v.maffione@gmail.com>
pkgname=netmap
pkgver=2.0
pkgrel=1
pkgdesc="Netmap is a framework for high speed network packet I/O."
arch=('any')
url="http://info.iet.unipi.it/~luigi/netmap"
license=('BSD')
groups=()
depends=('linux' 'glibc')
makedepends=('git' 'sed' 'gzip' 'linux-headers')
provides=()
conflicts=()
replaces=()
backup=()
options=()
install="netmap.install"
source=()
noextract=()
md5sums=() #generate with 'makepkg -g'
_gitroot="https://v.maffione@code.google.com/p/netmap/"
_gitname="netmap"
build() {
cd "$srcdir"
msg "Connecting to GIT server...."
if [[ -d "$_gitname" ]]; then
cd "$_gitname" && git pull origin
msg "The local files are updated."
else
git clone "$_gitroot" "$_gitname"
fi
msg "GIT checkout done or server timeout"
msg "Starting build..."
rm -rf "$srcdir/$_gitname-build"
git clone "$srcdir/$_gitname" "$srcdir/$_gitname-build"
cd "$srcdir/$_gitname-build"
# Build the netmap kernel module
cd "$srcdir/$_gitname-build/LINUX"
make || return 1
# Build pkt-gen and vale-ctl
cd "$srcdir/$_gitname-build/examples"
make pkt-gen vale-ctl || return 1
}
package() {
# Compute the version numbers of the running kernel
KVER1=$(uname -r)
KVER2=$(uname -r | sed 's/\.[0-9]\+-[0-9]\+//')
# Install the netmap module into the extramodules-VERSION directory
mkdir -p "$pkgdir/lib/modules/extramodules-${KVER2}"
cp "$srcdir/$_gitname-build/LINUX/netmap_lin.ko" "$pkgdir/lib/modules/extramodules-${KVER2}"
# Install pkt-gen and valectl into /usr/bin
mkdir -p "$pkgdir/usr/bin"
cp "$srcdir/$_gitname-build/examples/pkt-gen" "$pkgdir/usr/bin"
cp "$srcdir/$_gitname-build/examples/vale-ctl" "$pkgdir/usr/bin"
# Install the netmap man page
mkdir -p "$pkgdir/usr/share/man/man4"
cp "$srcdir/$_gitname-build/share/man/man4/netmap.4" "$pkgdir/usr/share/man/man4"
gzip "$pkgdir/usr/share/man/man4/netmap.4"
}
# vim:set ts=2 sw=2 et:

View File

@ -1,20 +0,0 @@
post_common() {
depmod -a
}
## arg 1: the new package version
post_install() {
post_common
}
## arg 1: the new package version
## arg 2: the old package version
post_upgrade() {
post_common
}
## arg 1: the old package version
post_remove() {
post_common
}

View File

@ -1,424 +0,0 @@
/*
* Copyright (C) 2012-2014 Luigi Rizzo - Universita` di Pisa
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* glue code to build the netmap bsd code under linux.
* Some of these tweaks are generic, some are specific for
* character device drivers and network code/device drivers.
*/
#ifndef _BSD_GLUE_H
#define _BSD_GLUE_H
/* a set of headers used in netmap */
#include <linux/version.h>
#include <linux/compiler.h> // ACCESS_ONCE()
#include <linux/if.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/poll.h>
#include <linux/netdevice.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/miscdevice.h>
#include <linux/etherdevice.h> // eth_type_trans
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/virtio.h> // virt_to_phys
#include <net/sock.h>
#include <linux/delay.h> // msleep
#include <linux/skbuff.h> // skb_copy_to_linear_data_offset
#include <linux/io.h> // virt_to_phys
#include <linux/hrtimer.h>
#define printf(fmt, arg...) printk(KERN_ERR fmt, ##arg)
#define KASSERT(a, b) BUG_ON(!(a))
/*----- support for compiling on older versions of linux -----*/
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
#define HRTIMER_MODE_REL HRTIMER_REL
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22)
#define skb_copy_from_linear_data_offset(skb, offset, to, copy) \
memcpy(to, (skb)->data + offset, copy)
#define skb_copy_to_linear_data_offset(skb, offset, from, copy) \
memcpy((skb)->data + offset, from, copy)
#define skb_copy_to_linear_data(skb, from, copy) \
memcpy((skb)->data, from, copy)
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
#define ACCESS_ONCE(x) (x)
#define uintptr_t unsigned long
#define skb_get_queue_mapping(m) (0)
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
/* Forward a hrtimer so it expires after the hrtimer's current now */
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
{
return hrtimer_forward(timer, timer->base->get_time(), interval);
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
typedef unsigned long phys_addr_t;
extern struct net init_net;
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) // XXX
#define netdev_ops hard_start_xmit
struct net_device_ops {
int (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev);
};
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) // XXX 31
#define netdev_tx_t int
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
#define usleep_range(a, b) msleep((a)+(b)+999)
#endif /* up to 2.6.35 */
/*----------- end of LINUX_VERSION_CODE dependencies ----------*/
/* Type redefinitions. XXX check them */
typedef void * bus_dma_tag_t;
typedef void * bus_dmamap_t;
typedef int bus_size_t;
typedef int bus_dma_segment_t;
typedef void * bus_addr_t;
#define vm_paddr_t phys_addr_t
/* XXX the 'off_t' on Linux corresponds to a 'long' */
#define vm_offset_t uint32_t
#define vm_ooffset_t unsigned long
struct thread;
/* endianness macros/functions */
#define le16toh le16_to_cpu
#define le32toh le32_to_cpu
#define le64toh le64_to_cpu
#define be16toh be16_to_cpu
#define be32toh be32_to_cpu
#define be64toh be64_to_cpu
#define htole32 cpu_to_le32
#define htole64 cpu_to_le64
#define htobe16 cpu_to_be16
#define htobe32 cpu_to_be32
#include <linux/jiffies.h>
#define time_second (jiffies_to_msecs(jiffies) / 1000U )
#define bzero(a, len) memset(a, 0, len)
/* Atomic variables. */
#define NM_ATOMIC_TEST_AND_SET(p) test_and_set_bit(0, (p))
#define NM_ATOMIC_CLEAR(p) clear_bit(0, (p))
#define NM_ATOMIC_SET(p, v) atomic_set(p, v)
#define NM_ATOMIC_INC(p) atomic_inc(p)
#define NM_ATOMIC_READ_AND_CLEAR(p) atomic_xchg(p, 0)
#define NM_ATOMIC_READ(p) atomic_read(p)
// XXX maybe implement it as a proper function somewhere
// it is important to set s->len before the copy.
#define m_devget(_buf, _len, _ofs, _dev, _fn) ( { \
struct sk_buff *s = netdev_alloc_skb(_dev, _len); \
if (s) { \
skb_put(s, _len); \
skb_copy_to_linear_data_offset(s, _ofs, _buf, _len); \
s->protocol = eth_type_trans(s, _dev); \
} \
s; } )
#define mbuf sk_buff
#define m_nextpkt next // chain of mbufs
#define m_freem(m) dev_kfree_skb_any(m) // free a sk_buff
#define GET_MBUF_REFCNT(m) NM_ATOMIC_READ(&((m)->users))
#define netmap_get_mbuf(size) alloc_skb(size, GFP_ATOMIC)
/*
* on tx we force skb->queue_mapping = ring_nr,
* but on rx it is the driver that sets the value,
* and it is 0 for no setting, ring_nr+1 otherwise.
*/
#define MBUF_TXQ(m) skb_get_queue_mapping(m)
#define MBUF_RXQ(m) (skb_rx_queue_recorded(m) ? skb_get_rx_queue(m) : 0)
#define SET_MBUF_DESTRUCTOR(m, f) m->destructor = (void *)&f
/* Magic number for sk_buff.priority field, used to take decisions in
* generic_ndo_start_xmit() and in linux_generic_rx_handler().
*/
#define NM_MAGIC_PRIORITY_TX 0xad86d310U
#define NM_MAGIC_PRIORITY_RX 0xad86d311U
/*
* m_copydata() copies from mbuf to buffer following the mbuf chain.
* skb_copy_bits() copies the skb headlen and all the fragments.
*/
#define m_copydata(m, o, l, b) skb_copy_bits(m, o, b, l)
#define copyin(_from, _to, _len) copy_from_user(_to, _from, _len)
/*
* struct ifnet is remapped into struct net_device on linux.
* ifnet has an if_softc field pointing to the device-specific struct
* (adapter).
* On linux the ifnet/net_device is at the beginning of the device-specific
* structure, so a pointer to the first field of the ifnet works.
* We don't use this in netmap, though.
*
* if_xname name device name
* if_capenable priv_flags
* we would use "features" but it is all taken.
* XXX check for conflict in flags use.
*
* In netmap we use if_pspare[0] to point to the netmap_adapter,
* in linux we have no spares so we overload ax25_ptr, and the detection
* for netmap-capable is some magic in the area pointed by that.
*/
#define WNA(_ifp) (_ifp)->ax25_ptr
#define ifnet net_device /* remap */
#define if_xname name /* field ifnet-> net_device */
#define if_capenable priv_flags /* IFCAP_NETMAP */
/* some other FreeBSD APIs */
struct net_device* ifunit_ref(const char *name);
void if_rele(struct net_device *ifp);
/* hook to send from user space */
netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *);
/* prevent ring params change while in netmap mode */
int linux_netmap_set_ringparam(struct net_device *, struct ethtool_ringparam *);
#ifdef ETHTOOL_SCHANNELS
int linux_netmap_set_channels(struct net_device *, struct ethtool_channels *);
#endif
#define CURVNET_SET(x)
#define CURVNET_RESTORE(x)
#define refcount_acquire(_a) atomic_add(1, (atomic_t *)_a)
#define refcount_release(_a) atomic_dec_and_test((atomic_t *)_a)
/*
* We use spin_lock_irqsave() because we use the lock in the
* (hard) interrupt context.
*/
typedef struct {
spinlock_t sl;
ulong flags;
} safe_spinlock_t;
static inline void mtx_lock(safe_spinlock_t *m)
{
spin_lock_irqsave(&(m->sl), m->flags);
}
static inline void mtx_unlock(safe_spinlock_t *m)
{
ulong flags = ACCESS_ONCE(m->flags);
spin_unlock_irqrestore(&(m->sl), flags);
}
#define mtx_init(a, b, c, d) spin_lock_init(&((a)->sl))
#define mtx_destroy(a) // XXX spin_lock_destroy(a)
/*
* XXX these must be changed, as we cannot sleep within the RCU.
* Must change to proper rwlock, and then can move the definitions
* into the main netmap.c file.
*/
#define BDG_RWLOCK_T struct rw_semaphore
#define BDG_RWINIT(b) init_rwsem(&(b)->bdg_lock)
#define BDG_WLOCK(b) down_write(&(b)->bdg_lock)
#define BDG_WUNLOCK(b) up_write(&(b)->bdg_lock)
#define BDG_RLOCK(b) down_read(&(b)->bdg_lock)
#define BDG_RUNLOCK(b) up_read(&(b)->bdg_lock)
#define BDG_RTRYLOCK(b) down_read_trylock(&(b)->bdg_lock)
#define BDG_SET_VAR(lval, p) ((lval) = (p))
#define BDG_GET_VAR(lval) (lval)
#define BDG_FREE(p) kfree(p)
/* use volatile to fix a probable compiler error on 2.6.25 */
#define malloc(_size, type, flags) \
({ volatile int _v = _size; kmalloc(_v, GFP_ATOMIC | __GFP_ZERO); })
#define free(a, t) kfree(a)
// XXX do we need GPF_ZERO ?
// XXX do we need GFP_DMA for slots ?
// http://www.mjmwired.net/kernel/Documentation/DMA-API.txt
#ifndef ilog2 /* not in 2.6.18 */
static inline int ilog2(uint64_t n)
{
uint64_t k = 1ULL<<63;
int i;
for (i = 63; i >= 0 && !(n &k); i--, k >>=1)
;
return i;
}
#endif /* ilog2 */
#define contigmalloc(sz, ty, flags, a, b, pgsz, c) \
(char *) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, \
ilog2(roundup_pow_of_two((sz)/PAGE_SIZE)))
#define contigfree(va, sz, ty) free_pages((unsigned long)va, \
ilog2(roundup_pow_of_two(sz)/PAGE_SIZE))
#define vtophys virt_to_phys
/*--- selrecord and friends ---*/
/* wake_up() or wake_up_interruptible() ? */
#define OS_selwakeup(sw, pri) wake_up(sw)
#define selrecord(x, y) poll_wait((struct file *)x, y, pwait)
// #define knlist_destroy(x) // XXX todo
#define tsleep(a, b, c, t) msleep(10)
// #define wakeup(sw) // XXX double check
#define microtime do_gettimeofday // debugging
/*
* The following trick is to map a struct cdev into a struct miscdevice
* On FreeBSD cdev and cdevsw are two different objects.
*/
#define cdev miscdevice
#define cdevsw miscdevice
/*
* XXX to complete - the dmamap interface
*/
#define BUS_DMA_NOWAIT 0
#define bus_dmamap_load(_1, _2, _3, _4, _5, _6, _7)
#define bus_dmamap_unload(_1, _2)
typedef int (d_mmap_t)(struct file *f, struct vm_area_struct *vma);
typedef unsigned int (d_poll_t)(struct file * file, struct poll_table_struct *pwait);
/*
* make_dev will set an error and return the first argument.
* This relies on the availability of the 'error' local variable.
* For old linux systems that do not have devfs, generate a
* message in syslog so the sysadmin knows which command to run
* in order to create the /dev/netmap entry
*/
#define make_dev(_cdev, _zero, _uid, _gid, _perm, _name) \
({error = misc_register(_cdev); \
D("run mknod /dev/%s c %d %d # error %d", \
(_cdev)->name, MISC_MAJOR, (_cdev)->minor, error); \
_cdev; } )
#define destroy_dev(_cdev) misc_deregister(_cdev)
/*--- sysctl API ----*/
/*
* linux: sysctl are mapped into /sys/module/ipfw_mod parameters
* windows: they are emulated via get/setsockopt
*/
#define CTLFLAG_RD 1
#define CTLFLAG_RW 2
struct sysctl_oid;
struct sysctl_req;
#define SYSCTL_DECL(_1)
#define SYSCTL_OID(_1, _2, _3, _4, _5, _6, _7, _8)
#define SYSCTL_NODE(_1, _2, _3, _4, _5, _6)
#define _SYSCTL_BASE(_name, _var, _ty, _perm) \
module_param_named(_name, *(_var), _ty, \
( (_perm) == CTLFLAG_RD) ? 0444: 0644 )
/* XXX should implement this */
extern struct kernel_param_ops generic_sysctl_ops;
#define SYSCTL_PROC(_base, _oid, _name, _mode, _var, _val, _fn, _ty, _desc) \
module_param_cb(_name, &generic_sysctl_ops, _fn, \
( (_mode) & CTLFLAG_WR) ? 0644: 0444 )
/* for a string, _var is a preallocated buffer of size _varlen */
#define SYSCTL_STRING(_base, _oid, _name, _mode, _var, _varlen, _desc) \
module_param_string(_name, _var, _varlen, \
((_mode) == CTLFLAG_RD) ? 0444: 0644 )
#define SYSCTL_INT(_base, _oid, _name, _mode, _var, _val, _desc) \
_SYSCTL_BASE(_name, _var, int, _mode)
#define SYSCTL_LONG(_base, _oid, _name, _mode, _var, _val, _desc) \
_SYSCTL_BASE(_name, _var, long, _mode)
#define SYSCTL_ULONG(_base, _oid, _name, _mode, _var, _val, _desc) \
_SYSCTL_BASE(_name, _var, ulong, _mode)
#define SYSCTL_UINT(_base, _oid, _name, _mode, _var, _val, _desc) \
_SYSCTL_BASE(_name, _var, uint, _mode)
// #define TUNABLE_INT(_name, _ptr)
#define SYSCTL_VNET_PROC SYSCTL_PROC
#define SYSCTL_VNET_INT SYSCTL_INT
#define SYSCTL_HANDLER_ARGS \
struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req
int sysctl_handle_int(SYSCTL_HANDLER_ARGS);
int sysctl_handle_long(SYSCTL_HANDLER_ARGS);
#define MALLOC_DECLARE(a)
#define MALLOC_DEFINE(a, b, c)
#define devfs_get_cdevpriv(pp) \
({ *(struct netmap_priv_d **)pp = ((struct file *)td)->private_data; \
(*pp ? 0 : ENOENT); })
/* devfs_set_cdevpriv cannot fail on linux */
#define devfs_set_cdevpriv(p, fn) \
({ ((struct file *)td)->private_data = p; (p ? 0 : EINVAL); })
#define devfs_clear_cdevpriv() do { \
netmap_dtor(priv); ((struct file *)td)->private_data = 0; \
} while (0)
#endif /* _BSD_GLUE_H */

View File

@ -1,104 +0,0 @@
diff --git a/e1000/e1000_main.c b/e1000/e1000_main.c
index bcd192c..5de7009 100644
--- a/e1000/e1000_main.c
+++ b/e1000/e1000_main.c
@@ -213,6 +213,10 @@ static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_e1000_netmap.h>
+#endif
+
/**
* e1000_init_module - Driver Registration Routine
*
@@ -375,6 +379,10 @@ static void e1000_configure(struct e1000_adapter *adapter)
e1000_configure_tx(adapter);
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
+#ifdef DEV_NETMAP
+ if (e1000_netmap_init_buffers(adapter))
+ return;
+#endif /* DEV_NETMAP */
/* call E1000_DESC_UNUSED which always leaves
* at least 1 descriptor unused to make sure
* next_to_use != next_to_clean */
@@ -402,6 +410,10 @@ int e1000_up(struct e1000_adapter *adapter)
netif_wake_queue(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif /* DEV_NETMAP */
+
/* fire a link change interrupt to start the watchdog */
ew32(ICS, E1000_ICS_LSC);
return 0;
@@ -485,6 +497,10 @@ void e1000_down(struct e1000_adapter *adapter)
ew32(RCTL, rctl & ~E1000_RCTL_EN);
/* flush and sleep below */
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
netif_tx_disable(netdev);
/* disable transmits in the hardware */
@@ -1035,6 +1051,10 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
adapter->wol = adapter->eeprom_wol;
device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+#ifdef DEV_NETMAP
+ e1000_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
/* print bus type/speed/width info */
DPRINTK(PROBE, INFO, "(PCI%s:%s:%s) ",
((hw->bus_type == e1000_bus_type_pcix) ? "-X" : ""),
@@ -1113,6 +1133,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
+
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
iounmap(hw->hw_addr);
if (hw->flash_address)
@@ -1291,6 +1315,10 @@ static int e1000_open(struct net_device *netdev)
netif_start_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif
+
/* fire a link status change interrupt to start the watchdog */
ew32(ICS, E1000_ICS_LSC);
@@ -3429,6 +3457,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
unsigned int count = 0;
unsigned int total_tx_bytes=0, total_tx_packets=0;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(netdev, 0))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
@@ -3795,6 +3827,11 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
bool cleaned = false;
unsigned int total_rx_bytes=0, total_rx_packets=0;
+#ifdef DEV_NETMAP
+ ND("calling netmap_rx_irq");
+ if (netmap_rx_irq(netdev, 0, work_done))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
buffer_info = &rx_ring->buffer_info[i];

View File

@ -1,91 +0,0 @@
diff --git a/e1000e/netdev.c b/e1000e/netdev.c
index fad8f9e..50f74e2 100644
--- a/e1000e/netdev.c
+++ b/e1000e/netdev.c
@@ -87,6 +87,10 @@ static int e1000_desc_unused(struct e1000_ring *ring)
return ring->count + ring->next_to_clean - ring->next_to_use - 1;
}
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_e1000e_netmap.h>
+#endif
+
/**
* e1000_receive_skb - helper function to handle Rx indications
* @adapter: board private structure
@@ -446,6 +450,10 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
bool cleaned = 0;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(netdev, 0, work_done))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
buffer_info = &rx_ring->buffer_info[i];
@@ -624,6 +632,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
unsigned int count = 0;
unsigned int total_tx_bytes = 0, total_tx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(netdev, 0))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
@@ -2632,6 +2644,10 @@ static void e1000_configure(struct e1000_adapter *adapter)
e1000_configure_tx(adapter);
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
+#ifdef DEV_NETMAP
+ if (e1000e_netmap_init_buffers(adapter))
+ return;
+#endif /* DEV_NETMAP */
adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring));
}
@@ -2892,6 +2908,10 @@ void e1000e_down(struct e1000_adapter *adapter)
netif_stop_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
/* disable transmits in the hardware */
tctl = er32(TCTL);
tctl &= ~E1000_TCTL_EN;
@@ -3174,6 +3194,10 @@ static int e1000_open(struct net_device *netdev)
netif_start_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
/* fire a link status change interrupt to start the watchdog */
ew32(ICS, E1000_ICS_LSC);
@@ -5227,6 +5251,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
if (err)
goto err_register;
+#ifdef DEV_NETMAP
+ e1000_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
@@ -5300,6 +5327,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(adapter->hw.hw_addr);
if (adapter->hw.flash_address)
iounmap(adapter->hw.flash_address);

View File

@ -1,91 +0,0 @@
diff --git a/e1000e/netdev.c b/e1000e/netdev.c
index 57a7e41..d8bc988 100644
--- a/e1000e/netdev.c
+++ b/e1000e/netdev.c
@@ -435,6 +435,10 @@ static int e1000_desc_unused(struct e1000_ring *ring)
return ring->count + ring->next_to_clean - ring->next_to_use - 1;
}
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_e1000e_netmap.h>
+#endif
+
/**
* e1000_receive_skb - helper function to handle Rx indications
* @adapter: board private structure
@@ -763,6 +767,10 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
bool cleaned = 0;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(netdev, 0, work_done))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
buffer_info = &rx_ring->buffer_info[i];
@@ -977,6 +985,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
unsigned int count = 0;
unsigned int total_tx_bytes = 0, total_tx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(netdev, 0))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
@@ -3001,6 +3013,10 @@ static void e1000_configure(struct e1000_adapter *adapter)
e1000_configure_tx(adapter);
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
+#ifdef DEV_NETMAP
+ if (e1000e_netmap_init_buffers(adapter))
+ return;
+#endif /* DEV_NETMAP */
adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring));
}
@@ -3240,6 +3256,10 @@ void e1000e_down(struct e1000_adapter *adapter)
netif_stop_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
/* disable transmits in the hardware */
tctl = er32(TCTL);
tctl &= ~E1000_TCTL_EN;
@@ -3532,6 +3552,10 @@ static int e1000_open(struct net_device *netdev)
netif_start_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
adapter->idle_check = true;
pm_runtime_put(&pdev->dev);
@@ -5716,6 +5740,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
if (err)
goto err_register;
+#ifdef DEV_NETMAP
+ e1000_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
@@ -5813,6 +5840,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(adapter->hw.hw_addr);
if (adapter->hw.flash_address)
iounmap(adapter->hw.flash_address);

View File

@ -1,91 +0,0 @@
diff --git a/e1000e/netdev.c b/e1000e/netdev.c
index 2198e61..caf2767 100644
--- a/e1000e/netdev.c
+++ b/e1000e/netdev.c
@@ -452,6 +452,10 @@ static int e1000_desc_unused(struct e1000_ring *ring)
return ring->count + ring->next_to_clean - ring->next_to_use - 1;
}
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_e1000e_netmap.h>
+#endif
+
/**
* e1000_receive_skb - helper function to handle Rx indications
* @adapter: board private structure
@@ -849,6 +853,10 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
bool cleaned = 0;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(netdev, 0, work_done))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
buffer_info = &rx_ring->buffer_info[i];
@@ -1066,6 +1074,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
unsigned int count = 0;
unsigned int total_tx_bytes = 0, total_tx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(netdev, 0))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
@@ -3177,6 +3189,10 @@ static void e1000_configure(struct e1000_adapter *adapter)
e1000_configure_tx(adapter);
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
+#ifdef DEV_NETMAP
+ if (e1000e_netmap_init_buffers(adapter))
+ return;
+#endif /* DEV_NETMAP */
adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring),
GFP_KERNEL);
}
@@ -3468,6 +3484,10 @@ void e1000e_down(struct e1000_adapter *adapter)
netif_stop_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
/* disable transmits in the hardware */
tctl = er32(TCTL);
tctl &= ~E1000_TCTL_EN;
@@ -3755,6 +3775,10 @@ static int e1000_open(struct net_device *netdev)
netif_start_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
adapter->idle_check = true;
pm_runtime_put(&pdev->dev);
@@ -6147,6 +6171,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
if (err)
goto err_register;
+#ifdef DEV_NETMAP
+ e1000_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
@@ -6234,6 +6261,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(adapter->hw.hw_addr);
if (adapter->hw.flash_address)
iounmap(adapter->hw.flash_address);

View File

@ -1,91 +0,0 @@
diff --git a/e1000e/netdev.c b/e1000e/netdev.c
index 9520a6a..f6f2df6 100644
--- a/e1000e/netdev.c
+++ b/e1000e/netdev.c
@@ -467,6 +467,10 @@ static int e1000_desc_unused(struct e1000_ring *ring)
return ring->count + ring->next_to_clean - ring->next_to_use - 1;
}
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_e1000e_netmap.h>
+#endif
+
/**
* e1000_receive_skb - helper function to handle Rx indications
* @adapter: board private structure
@@ -875,6 +879,10 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
bool cleaned = false;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(netdev, 0, work_done))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
@@ -1129,6 +1137,10 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring)
unsigned int total_tx_bytes = 0, total_tx_packets = 0;
unsigned int bytes_compl = 0, pkts_compl = 0;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(netdev, 0))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
@@ -3358,6 +3370,10 @@ static void e1000_configure(struct e1000_adapter *adapter)
e1000e_setup_rss_hash(adapter);
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
+#ifdef DEV_NETMAP
+ if (e1000e_netmap_init_buffers(adapter))
+ return;
+#endif /* DEV_NETMAP */
adapter->alloc_rx_buf(rx_ring, e1000_desc_unused(rx_ring), GFP_KERNEL);
}
@@ -3657,6 +3673,10 @@ void e1000e_down(struct e1000_adapter *adapter)
netif_stop_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
/* disable transmits in the hardware */
tctl = er32(TCTL);
tctl &= ~E1000_TCTL_EN;
@@ -3946,6 +3966,10 @@ static int e1000_open(struct net_device *netdev)
adapter->tx_hang_recheck = false;
netif_start_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
adapter->idle_check = true;
pm_runtime_put(&pdev->dev);
@@ -6417,6 +6441,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
if (err)
goto err_register;
+#ifdef DEV_NETMAP
+ e1000_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
@@ -6504,6 +6531,10 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(adapter->hw.hw_addr);
if (adapter->hw.flash_address)
iounmap(adapter->hw.flash_address);

View File

@ -1,91 +0,0 @@
diff --git a/e1000e/netdev.c b/e1000e/netdev.c
index 7e615e2..f9d8a88 100644
--- a/e1000e/netdev.c
+++ b/e1000e/netdev.c
@@ -473,6 +473,10 @@ static int e1000_desc_unused(struct e1000_ring *ring)
return ring->count + ring->next_to_clean - ring->next_to_use - 1;
}
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_e1000e_netmap.h>
+#endif
+
/**
* e1000e_systim_to_hwtstamp - convert system time value to hw time stamp
* @adapter: board private structure
@@ -914,6 +918,10 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
bool cleaned = false;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(netdev, 0, work_done))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
@@ -1203,6 +1211,10 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring)
unsigned int total_tx_bytes = 0, total_tx_packets = 0;
unsigned int bytes_compl = 0, pkts_compl = 0;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(netdev, 0))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
@@ -3685,6 +3697,10 @@ static void e1000_configure(struct e1000_adapter *adapter)
e1000e_setup_rss_hash(adapter);
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
+#ifdef DEV_NETMAP
+ if (e1000e_netmap_init_buffers(adapter))
+ return;
+#endif /* DEV_NETMAP */
adapter->alloc_rx_buf(rx_ring, e1000_desc_unused(rx_ring), GFP_KERNEL);
}
@@ -3988,6 +4004,10 @@ void e1000e_down(struct e1000_adapter *adapter)
netif_stop_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
/* disable transmits in the hardware */
tctl = er32(TCTL);
tctl &= ~E1000_TCTL_EN;
@@ -4307,6 +4327,10 @@ static int e1000_open(struct net_device *netdev)
adapter->tx_hang_recheck = false;
netif_start_queue(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
adapter->idle_check = true;
hw->mac.get_link_status = true;
pm_runtime_put(&pdev->dev);
@@ -6768,6 +6792,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_register;
+#ifdef DEV_NETMAP
+ e1000_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
@@ -6866,6 +6893,10 @@ static void e1000_remove(struct pci_dev *pdev)
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(adapter->hw.hw_addr);
if (adapter->hw.flash_address)
iounmap(adapter->hw.flash_address);

View File

@ -1,76 +0,0 @@
diff --git a/forcedeth.c b/forcedeth.c
index 9c0b1ba..b081d6b 100644
--- a/forcedeth.c
+++ b/forcedeth.c
@@ -1865,12 +1865,25 @@ static void nv_init_tx(struct net_device *dev)
}
}
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/* we need a few forward declarations */
+static void nv_drain_rxtx(struct net_device *dev);
+static int nv_init_ring(struct net_device *dev);
+#include <forcedeth_netmap.h>
+#endif
+
static int nv_init_ring(struct net_device *dev)
{
struct fe_priv *np = netdev_priv(dev);
nv_init_tx(dev);
nv_init_rx(dev);
+#ifdef DEV_NETMAP
+ forcedeth_netmap_tx_init(np);
+ if (forcedeth_netmap_rx_init(np))
+ return 0; /* success */
+#endif /* DEV_NETMAP */
+
if (!nv_optimized(np))
return nv_alloc_rx(dev);
@@ -3386,6 +3399,11 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
int i;
unsigned long flags;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(dev, 0))
+ return IRQ_HANDLED;
+#endif /* DEV_NETMAP */
+
for (i = 0;; i++) {
events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_TX_ALL;
writel(NVREG_IRQ_TX_ALL, base + NvRegMSIXIrqStatus);
@@ -3497,6 +3515,11 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data)
int i;
unsigned long flags;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(dev, 0, &i))
+ return IRQ_HANDLED;
+#endif /* DEV_NETMAP */
+
for (i = 0;; i++) {
events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_RX_ALL;
writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus);
@@ -5645,6 +5668,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
goto out_error;
}
+#ifdef DEV_NETMAP
+ forcedeth_netmap_attach(np);
+#endif /* DEV_NETMAP */
+
netif_carrier_off(dev);
dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
@@ -5728,6 +5755,10 @@ static void __devexit nv_remove(struct pci_dev *pci_dev)
unregister_netdev(dev);
+#ifdef DEV_NETMAP
+ netmap_detach(dev);
+#endif /* DEV_NETMAP */
+
nv_restore_mac_addr(pci_dev);
/* restore any phy related changes */

View File

@ -1,37 +0,0 @@
diff --git a/igb/igb_main.c b/igb/igb_main.c
index c881347..77b3fda 100644
--- a/igb/igb_main.c
+++ b/igb/igb_main.c
@@ -1144,6 +1144,10 @@ int igb_up(struct igb_adapter *adapter)
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -1167,6 +1171,10 @@ void igb_down(struct igb_adapter *adapter)
wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
/* flush and sleep below */
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
netif_tx_stop_all_queues(netdev);
/* disable transmits in the hardware */
@@ -2018,6 +2026,10 @@ static int igb_open(struct net_device *netdev)
netif_tx_start_all_queues(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);

View File

@ -1,115 +0,0 @@
diff --git a/igb/igb_main.c b/igb/igb_main.c
index cea37e0..70777e4 100644
--- a/igb/igb_main.c
+++ b/igb/igb_main.c
@@ -201,6 +201,10 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_igb_netmap.h>
+#endif
+
struct igb_reg_info {
u32 ofs;
char *name;
@@ -1478,6 +1482,10 @@ int igb_up(struct igb_adapter *adapter)
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -1501,6 +1509,10 @@ void igb_down(struct igb_adapter *adapter)
wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
/* flush and sleep below */
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
netif_tx_stop_all_queues(netdev);
/* disable transmits in the hardware */
@@ -1963,6 +1975,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
+#ifdef DEV_NETMAP
+ igb_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
#ifdef CONFIG_IGB_DCA
if (dca_add_requester(&pdev->dev) == 0) {
adapter->flags |= IGB_FLAG_DCA_ENABLED;
@@ -2072,6 +2088,10 @@ static void __devexit igb_remove(struct pci_dev *pdev)
dev_info(&pdev->dev, "IOV Disabled\n");
}
#endif
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(hw->hw_addr);
if (hw->flash_address)
@@ -2366,6 +2386,10 @@ static int igb_open(struct net_device *netdev)
netif_tx_start_all_queues(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -2545,6 +2569,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
+#ifdef DEV_NETMAP
+ igb_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
/**
@@ -5338,6 +5365,11 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
unsigned int i, eop, count = 0;
bool cleaned = false;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(netdev, tx_ring->queue_index))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
+
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
@@ -5540,6 +5572,11 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
u16 length;
u16 vlan_tag;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(netdev, rx_ring->queue_index, work_done))
+ return 1;
+#endif /* DEV_NETMAP */
+
i = rx_ring->next_to_clean;
buffer_info = &rx_ring->buffer_info[i];
rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
@@ -5668,6 +5705,10 @@ void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
unsigned int i;
int bufsz;
+#ifdef DEV_NETMAP
+ if (igb_netmap_configure_rx_ring(rx_ring))
+ return;
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_use;
buffer_info = &rx_ring->buffer_info[i];

View File

@ -1,136 +0,0 @@
diff --git a/igb/igb_main.c b/igb/igb_main.c
index ced5444..fb7c766 100644
--- a/igb/igb_main.c
+++ b/igb/igb_main.c
@@ -225,6 +225,10 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_igb_netmap.h>
+#endif
+
struct igb_reg_info {
u32 ofs;
char *name;
@@ -1551,6 +1555,10 @@ int igb_up(struct igb_adapter *adapter)
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -1584,6 +1592,10 @@ void igb_down(struct igb_adapter *adapter)
wrfl();
msleep(10);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
for (i = 0; i < adapter->num_q_vectors; i++)
napi_disable(&(adapter->q_vector[i]->napi));
@@ -2073,6 +2085,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
+#ifdef DEV_NETMAP
+ igb_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
#ifdef CONFIG_IGB_DCA
if (dca_add_requester(&pdev->dev) == 0) {
adapter->flags |= IGB_FLAG_DCA_ENABLED;
@@ -2199,6 +2215,10 @@ static void __devexit igb_remove(struct pci_dev *pdev)
dev_info(&pdev->dev, "IOV Disabled\n");
}
#endif
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(hw->hw_addr);
if (hw->flash_address)
@@ -2529,6 +2549,10 @@ static int igb_open(struct net_device *netdev)
netif_tx_start_all_queues(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -2711,6 +2735,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
+#ifdef DEV_NETMAP
+ igb_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
/**
@@ -3088,6 +3115,19 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
/* Only set Drop Enable if we are supporting multiple queues */
if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
srrctl |= E1000_SRRCTL_DROP_EN;
+#ifdef DEV_NETMAP
+ {
+ /* The driver uses split buffers, which are not
+ * supported in netmap mode */
+ struct ifnet *ifp = adapter->netdev;
+ struct netmap_adapter *na = NA(ifp);
+ if (na && ifp->if_capenable & IFCAP_NETMAP) {
+ srrctl &= ~(7 << 25); /* clear descriptor type */
+ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+ /* XXX we should set tail here */
+ }
+ }
+#endif
wr32(E1000_SRRCTL(reg_idx), srrctl);
@@ -5705,6 +5745,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
if (test_bit(__IGB_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IGB_TX_DESC(tx_ring, i);
@@ -5980,6 +6024,12 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
u16 cleaned_count = igb_desc_unused(rx_ring);
u16 i = rx_ring->next_to_clean;
+#ifdef DEV_NETMAP
+ int dummy = 1; // select rx irq handling
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy))
+ return 1;
+#endif /* DEV_NETMAP */
+
rx_desc = IGB_RX_DESC(rx_ring, i);
while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
@@ -6170,6 +6220,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
struct igb_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
+#ifdef DEV_NETMAP
+ if (igb_netmap_configure_rx_ring(rx_ring))
+ return;
+#endif /* DEV_NETMAP */
+
rx_desc = IGB_RX_DESC(rx_ring, i);
bi = &rx_ring->rx_buffer_info[i];
i -= rx_ring->count;

View File

@ -1,136 +0,0 @@
diff --git a/igb/igb_main.c b/igb/igb_main.c
index 94be6c3..294051b 100644
--- a/igb/igb_main.c
+++ b/igb/igb_main.c
@@ -236,6 +236,10 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_igb_netmap.h>
+#endif
+
struct igb_reg_info {
u32 ofs;
char *name;
@@ -1557,6 +1561,10 @@ int igb_up(struct igb_adapter *adapter)
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -1590,6 +1598,10 @@ void igb_down(struct igb_adapter *adapter)
wrfl();
msleep(10);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
for (i = 0; i < adapter->num_q_vectors; i++)
napi_disable(&(adapter->q_vector[i]->napi));
@@ -2081,6 +2093,10 @@ static int __devinit igb_probe(struct pci_dev *pdev,
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
+#ifdef DEV_NETMAP
+ igb_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
#ifdef CONFIG_IGB_DCA
if (dca_add_requester(&pdev->dev) == 0) {
adapter->flags |= IGB_FLAG_DCA_ENABLED;
@@ -2211,6 +2227,10 @@ static void __devexit igb_remove(struct pci_dev *pdev)
dev_info(&pdev->dev, "IOV Disabled\n");
}
#endif
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
iounmap(hw->hw_addr);
if (hw->flash_address)
@@ -2547,6 +2567,10 @@ static int __igb_open(struct net_device *netdev, bool resuming)
netif_tx_start_all_queues(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
if (!resuming)
pm_runtime_put(&pdev->dev);
@@ -2750,6 +2774,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
+#ifdef DEV_NETMAP
+ igb_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
/**
@@ -3127,6 +3154,19 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
/* Only set Drop Enable if we are supporting multiple queues */
if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
srrctl |= E1000_SRRCTL_DROP_EN;
+#ifdef DEV_NETMAP
+ {
+ /* The driver uses split buffers, which are not
+ * supported in netmap mode */
+ struct ifnet *ifp = adapter->netdev;
+ struct netmap_adapter *na = NA(ifp);
+ if (na && ifp->if_capenable & IFCAP_NETMAP) {
+ srrctl &= ~(7 << 25); /* clear descriptor type */
+ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+ /* XXX we should set tail here */
+ }
+ }
+#endif
wr32(E1000_SRRCTL(reg_idx), srrctl);
@@ -5753,6 +5793,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
if (test_bit(__IGB_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IGB_TX_DESC(tx_ring, i);
@@ -6030,6 +6074,12 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
u16 cleaned_count = igb_desc_unused(rx_ring);
u16 i = rx_ring->next_to_clean;
+#ifdef DEV_NETMAP
+ int dummy = 1; // select rx irq handling
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy))
+ return 1;
+#endif /* DEV_NETMAP */
+
rx_desc = IGB_RX_DESC(rx_ring, i);
while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
@@ -6220,6 +6270,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
struct igb_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
+#ifdef DEV_NETMAP
+ if (igb_netmap_configure_rx_ring(rx_ring))
+ return;
+#endif /* DEV_NETMAP */
+
rx_desc = IGB_RX_DESC(rx_ring, i);
bi = &rx_ring->rx_buffer_info[i];
i -= rx_ring->count;

View File

@ -1,114 +0,0 @@
diff --git a/igb/igb_main.c b/igb/igb_main.c
index 31cfe2e..8439bc6 100644
--- a/igb/igb_main.c
+++ b/igb/igb_main.c
@@ -247,6 +247,10 @@ static int debug = -1;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_igb_netmap.h>
+#endif
+
struct igb_reg_info {
u32 ofs;
char *name;
@@ -1520,6 +1524,10 @@ int igb_up(struct igb_adapter *adapter)
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -1553,6 +1561,10 @@ void igb_down(struct igb_adapter *adapter)
wrfl();
msleep(10);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
for (i = 0; i < adapter->num_q_vectors; i++)
napi_disable(&(adapter->q_vector[i]->napi));
@@ -2127,6 +2139,10 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
+#ifdef DEV_NETMAP
+ igb_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
#ifdef CONFIG_IGB_DCA
if (dca_add_requester(&pdev->dev) == 0) {
adapter->flags |= IGB_FLAG_DCA_ENABLED;
@@ -2233,6 +2249,10 @@ static void igb_remove(struct pci_dev *pdev)
wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
}
#endif
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
/* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant. */
@@ -2553,6 +2573,10 @@ static int __igb_open(struct net_device *netdev, bool resuming)
netif_tx_start_all_queues(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
if (!resuming)
pm_runtime_put(&pdev->dev);
@@ -2746,6 +2770,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
+#ifdef DEV_NETMAP
+ igb_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
/**
@@ -5690,6 +5717,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
if (test_bit(__IGB_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IGB_TX_DESC(tx_ring, i);
@@ -6349,6 +6380,10 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
unsigned int total_bytes = 0, total_packets = 0;
u16 cleaned_count = igb_desc_unused(rx_ring);
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &total_packets))
+ return true;
+#endif /* DEV_NETMAP */
do {
union e1000_adv_rx_desc *rx_desc;
@@ -6461,6 +6496,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
struct igb_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
+#ifdef DEV_NETMAP
+ if (igb_netmap_configure_rx_ring(rx_ring))
+ return;
+#endif /* DEV_NETMAP */
+
/* nothing to do */
if (!cleaned_count)
return;

View File

@ -1,113 +0,0 @@
diff --git a/igb/igb_main.c b/igb/igb_main.c
index c1d72c0..9815796 100644
--- a/igb/igb_main.c
+++ b/igb/igb_main.c
@@ -255,6 +255,10 @@ static int debug = -1;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_igb_netmap.h>
+#endif
+
struct igb_reg_info {
u32 ofs;
char *name;
@@ -1633,6 +1637,10 @@ int igb_up(struct igb_adapter *adapter)
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif /* DEV_NETMAP */
+
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
@@ -1674,6 +1682,9 @@ void igb_down(struct igb_adapter *adapter)
napi_disable(&(adapter->q_vector[i]->napi));
}
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif /* DEV_NETMAP */
del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
@@ -2295,6 +2306,10 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
+#ifdef DEV_NETMAP
+ igb_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
#ifdef CONFIG_IGB_DCA
if (dca_add_requester(&pdev->dev) == 0) {
adapter->flags |= IGB_FLAG_DCA_ENABLED;
@@ -2536,6 +2551,10 @@ static void igb_remove(struct pci_dev *pdev)
wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
}
#endif
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
/* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
@@ -2814,6 +2833,10 @@ static int __igb_open(struct net_device *netdev, bool resuming)
netif_tx_start_all_queues(netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(netdev);
+#endif /* DEV_NETMAP */
+
if (!resuming)
pm_runtime_put(&pdev->dev);
@@ -3007,6 +3030,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
+#ifdef DEV_NETMAP
+ igb_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
/**
@@ -5991,6 +6017,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
if (test_bit(__IGB_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(tx_ring->netdev, tx_ring->queue_index))
+ return 1; /* cleaned ok */
+#endif /* DEV_NETMAP */
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IGB_TX_DESC(tx_ring, i);
@@ -6650,6 +6680,10 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
unsigned int total_bytes = 0, total_packets = 0;
u16 cleaned_count = igb_desc_unused(rx_ring);
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &total_packets))
+ return true;
+#endif /* DEV_NETMAP */
do {
union e1000_adv_rx_desc *rx_desc;
@@ -6767,6 +6801,11 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
struct igb_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
+#ifdef DEV_NETMAP
+ if (igb_netmap_configure_rx_ring(rx_ring))
+ return;
+#endif /* DEV_NETMAP */
+
/* nothing to do */
if (!cleaned_count)
return;

View File

@ -1,113 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index eee0b29..70581eb 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -214,6 +214,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -740,6 +756,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
unsigned int i, eop, count = 0;
unsigned int total_bytes = 0, total_packets = 0;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->tx_buffer_info[i].next_to_watch;
eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
@@ -1185,6 +1211,13 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
int ddp_bytes = 0;
#endif /* IXGBE_FCOE */
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done))
+ return;
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
@@ -2519,6 +2552,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -2833,6 +2869,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(adapter, ring, IXGBE_DESC_UNUSED(ring));
}
@@ -3614,6 +3654,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -3863,6 +3907,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
/* Cleanup the affinity_hint CPU mask memory and callback */
for (i = 0; i < num_q_vectors; i++) {
struct ixgbe_q_vector *q_vector = adapter->q_vector[i];
@@ -7048,6 +7096,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
e_dev_info("Intel(R) 10 Gigabit Network Connection\n");
cards_found++;
+
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,113 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index 30f9ccf..60c0252 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -221,6 +221,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -826,6 +842,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
unsigned int total_bytes = 0, total_packets = 0;
u16 i, eop, count = 0;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->tx_buffer_info[i].next_to_watch;
eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
@@ -1308,6 +1334,13 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
u16 cleaned_count = 0;
bool pkt_is_rsc = false;
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done))
+ return;
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
@@ -2730,6 +2763,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3094,6 +3130,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, IXGBE_DESC_UNUSED(ring));
}
@@ -3882,6 +3922,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4121,6 +4165,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
/* Cleanup the affinity_hint CPU mask memory and callback */
for (i = 0; i < num_q_vectors; i++) {
struct ixgbe_q_vector *q_vector = adapter->q_vector[i];
@@ -7450,6 +7498,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
e_dev_info("Intel(R) 10 Gigabit Network Connection\n");
cards_found++;
+
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,113 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index 08e8e25..8070930 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -247,6 +247,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -864,6 +880,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
unsigned int total_bytes = 0, total_packets = 0;
u16 i, eop, count = 0;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
i = tx_ring->next_to_clean;
eop = tx_ring->tx_buffer_info[i].next_to_watch;
eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
@@ -1348,6 +1374,13 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
u16 cleaned_count = 0;
bool pkt_is_rsc = false;
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done))
+ return;
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
@@ -2808,6 +2841,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3183,6 +3219,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, IXGBE_DESC_UNUSED(ring));
}
@@ -3976,6 +4016,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4212,6 +4256,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
IXGBE_FLAG2_RESET_REQUESTED);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -7683,6 +7731,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
e_dev_info("Intel(R) 10 Gigabit Network Connection\n");
cards_found++;
+
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,114 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index e1fcc95..1aab0df 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -249,6 +249,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -801,6 +817,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
unsigned int total_bytes = 0, total_packets = 0;
u16 i, eop, count = 0;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
+
i = tx_ring->next_to_clean;
eop = tx_ring->tx_buffer_info[i].next_to_watch;
eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
@@ -1303,6 +1330,13 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
u16 cleaned_count = 0;
bool pkt_is_rsc = false;
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, work_done))
+ return;
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
@@ -2676,6 +2710,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3039,6 +3076,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
}
@@ -3873,6 +3914,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4126,6 +4171,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
IXGBE_FLAG2_RESET_REQUESTED);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -7696,6 +7745,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
e_dev_info("Intel(R) 10 Gigabit Network Connection\n");
cards_found++;
+
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,115 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index 8ef92d1..6a37803 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -188,6 +188,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -745,6 +761,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
unsigned int budget = q_vector->tx.work_limit;
u16 i = tx_ring->next_to_clean;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return true; /* seems to be ignored */
+#endif /* DEV_NETMAP */
+
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i);
@@ -1253,6 +1280,14 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
u16 cleaned_count = 0;
bool pkt_is_rsc = false;
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ int dummy;
+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, &dummy))
+ return true;
+#endif /* DEV_NETMAP */
i = rx_ring->next_to_clean;
rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
@@ -2420,6 +2455,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -2783,6 +2821,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
}
@@ -3757,6 +3799,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4007,6 +4053,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
IXGBE_FLAG2_RESET_REQUESTED);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -7710,6 +7760,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
e_dev_info("Intel(R) 10 Gigabit Network Connection\n");
cards_found++;
+
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,124 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index 467948e..0aa1511 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -204,6 +204,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -749,6 +765,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
if (test_bit(__IXGBE_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
+
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IXGBE_TX_DESC(tx_ring, i);
i -= tx_ring->count;
@@ -1629,6 +1656,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
#endif /* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ int dummy;
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy))
+ return true; /* no more interrupts */
+#endif /* DEV_NETMAP */
+
do {
struct ixgbe_rx_buffer *rx_buffer;
union ixgbe_adv_rx_desc *rx_desc;
@@ -2683,6 +2719,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3032,6 +3071,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
}
@@ -3986,6 +4029,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4249,6 +4296,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
IXGBE_FLAG2_RESET_REQUESTED);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4764,6 +4815,7 @@ static int ixgbe_open(struct net_device *netdev)
ixgbe_up_complete(adapter);
+
return 0;
err_req_irq:
@@ -7152,6 +7204,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
e_dev_info("%s\n", ixgbe_default_device_descr);
cards_found++;
+
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,123 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index e242104..02e1544 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -204,6 +204,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -764,6 +780,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
if (test_bit(__IXGBE_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
+
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IXGBE_TX_DESC(tx_ring, i);
i -= tx_ring->count;
@@ -1665,6 +1692,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
#endif /* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ int dummy;
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy))
+ return true; /* no more interrupts */
+#endif /* DEV_NETMAP */
+
do {
struct ixgbe_rx_buffer *rx_buffer;
union ixgbe_adv_rx_desc *rx_desc;
@@ -2725,6 +2761,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3102,6 +3141,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
}
@@ -4051,6 +4094,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4315,6 +4362,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
IXGBE_FLAG2_RESET_REQUESTED);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4827,6 +4878,7 @@ static int ixgbe_open(struct net_device *netdev)
ixgbe_up_complete(adapter);
+
return 0;
err_req_irq:
@@ -7358,6 +7410,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
e_err(probe, "failed to allocate sysfs resources\n");
#endif /* CONFIG_IXGBE_HWMON */
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,134 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index 79f4a26..4b8a25b 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -202,6 +202,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -826,6 +842,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
if (test_bit(__IXGBE_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
+
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IXGBE_TX_DESC(tx_ring, i);
i -= tx_ring->count;
@@ -1860,6 +1887,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
#endif /* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ int dummy;
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy))
+ return true; /* no more interrupts */
+#endif /* DEV_NETMAP */
+
do {
union ixgbe_adv_rx_desc *rx_desc;
struct sk_buff *skb;
@@ -2846,6 +2882,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3207,6 +3246,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
}
@@ -4155,6 +4198,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4402,6 +4449,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
IXGBE_FLAG2_RESET_REQUESTED);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4976,6 +5027,7 @@ static int ixgbe_open(struct net_device *netdev)
ixgbe_up_complete(adapter);
+
return 0;
err_set_queues:
@@ -7619,6 +7671,10 @@ skip_sriov:
ixgbe_dbg_adapter_init(adapter);
#endif /* CONFIG_DEBUG_FS */
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:
@@ -7653,6 +7709,10 @@ static void ixgbe_remove(struct pci_dev *pdev)
struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
struct net_device *netdev = adapter->netdev;
+#ifdef DEV_NETMAP
+ netmap_detach(netdev);
+#endif /* DEV_NETMAP */
+
#ifdef CONFIG_DEBUG_FS
ixgbe_dbg_adapter_exit(adapter);
#endif /*CONFIG_DEBUG_FS */

View File

@ -1,123 +0,0 @@
diff --git a/ixgbe/ixgbe_main.c b/ixgbe/ixgbe_main.c
index d30fbdd..7418c57 100644
--- a/ixgbe/ixgbe_main.c
+++ b/ixgbe/ixgbe_main.c
@@ -248,6 +248,22 @@ static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
{}
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap_linux.h .
+ *
+ * The code is originally developed on FreeBSD and in the interest
+ * of maintainability we try to limit differences between the two systems.
+ *
+ * <ixgbe_netmap_linux.h> contains functions for netmap support
+ * that extend the standard driver.
+ * It also defines DEV_NETMAP so further conditional sections use
+ * that instead of CONFIG_NETMAP
+ */
+#include <ixgbe_netmap_linux.h>
+#endif
/*
* ixgbe_regdump - register printout routine
@@ -872,6 +888,17 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
if (test_bit(__IXGBE_DOWN, &adapter->state))
return true;
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ */
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return 1; /* seems to be ignored */
+#endif /* DEV_NETMAP */
+
tx_buffer = &tx_ring->tx_buffer_info[i];
tx_desc = IXGBE_TX_DESC(tx_ring, i);
i -= tx_ring->count;
@@ -1906,6 +1933,15 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
#endif /* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+#ifdef DEV_NETMAP
+ /*
+ * Same as the txeof routine: only wakeup clients on intr.
+ */
+ int dummy;
+ if (netmap_rx_irq(rx_ring->netdev, rx_ring->queue_index, &dummy))
+ return true; /* no more interrupts */
+#endif /* DEV_NETMAP */
+
do {
union ixgbe_adv_rx_desc *rx_desc;
struct sk_buff *skb;
@@ -2905,6 +2941,9 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
if (!wait_loop)
e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_configure_tx_ring(adapter, reg_idx);
+#endif /* DEV_NETMAP */
}
static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3266,6 +3305,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
ixgbe_rx_desc_queue_enable(adapter, ring);
+#ifdef DEV_NETMAP
+ if (ixgbe_netmap_configure_rx_ring(adapter, reg_idx))
+ return;
+#endif /* DEV_NETMAP */
ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
}
@@ -4216,6 +4259,10 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(adapter->netdev);
+#endif
+
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4463,6 +4510,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(netdev);
+#endif
+
adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
IXGBE_FLAG2_RESET_REQUESTED);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -5037,6 +5088,7 @@ static int ixgbe_open(struct net_device *netdev)
ixgbe_up_complete(adapter);
+
return 0;
err_set_queues:
@@ -7658,6 +7710,10 @@ skip_sriov:
IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
true);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
+
return 0;
err_register:

View File

@ -1,117 +0,0 @@
diff --git a/r8169.c b/r8169.c
index 0fe2fc9..efee0a4 100644
--- a/r8169.c
+++ b/r8169.c
@@ -537,6 +537,10 @@ static int rtl8169_poll(struct napi_struct *napi, int budget);
static const unsigned int rtl8169_rx_config =
(RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift);
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_re_netmap_linux.h>
+#endif
+
static void mdio_write(void __iomem *ioaddr, int reg_addr, int value)
{
int i;
@@ -3210,6 +3214,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
device_set_wakeup_enable(&pdev->dev, tp->features & RTL_FEATURE_WOL);
+#ifdef DEV_NETMAP
+ re_netmap_attach(tp);
+#endif /* DEV_NETMAP */
+
out:
return rc;
@@ -3236,6 +3244,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
unregister_netdev(dev);
+#ifdef DEV_NETMAP
+ netmap_detach(dev);
+#endif /* DEV_NETMAP */
+
/* restore original MAC address */
rtl_rar_set(tp, dev->perm_addr);
@@ -3291,6 +3303,10 @@ static int rtl8169_open(struct net_device *dev)
napi_enable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl_hw_start(dev);
rtl8169_request_timer(dev);
@@ -3993,6 +4009,11 @@ err_out:
static void rtl8169_rx_clear(struct rtl8169_private *tp)
{
unsigned int i;
+#ifdef DEV_NETMAP
+ re_netmap_tx_init(tp);
+ if (re_netmap_rx_init(tp))
+ return 0; // success
+#endif /* DEV_NETMAP */
for (i = 0; i < NUM_RX_DESC; i++) {
if (tp->Rx_skbuff[i]) {
@@ -4112,11 +4133,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev)
/* Wait for any pending NAPI task to complete */
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl8169_irq_mask_and_ack(ioaddr);
tp->intr_mask = 0xffff;
RTL_W16(IntrMask, tp->intr_event);
napi_enable(&tp->napi);
+
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
}
static void rtl8169_reinit_task(struct work_struct *work)
@@ -4372,6 +4401,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev,
{
unsigned int dirty_tx, tx_left;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(dev, 0))
+ return;
+#endif /* DEV_NETMAP */
+
dirty_tx = tp->dirty_tx;
smp_rmb();
tx_left = tp->cur_tx - dirty_tx;
@@ -4468,6 +4502,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
unsigned int cur_rx, rx_left;
unsigned int delta, count;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(dev, 0, &count))
+ return count;
+#endif /* DEV_NETMAP */
+
cur_rx = tp->cur_rx;
rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
rx_left = min(rx_left, budget);
@@ -4687,7 +4726,12 @@ static void rtl8169_down(struct net_device *dev)
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
core_down:
+
spin_lock_irq(&tp->lock);
rtl8169_asic_down(ioaddr);

View File

@ -1,115 +0,0 @@
diff --git a/r8169.c b/r8169.c
index 53b13de..745a59d 100644
--- a/r8169.c
+++ b/r8169.c
@@ -535,6 +535,10 @@ static int rtl8169_poll(struct napi_struct *napi, int budget);
static const unsigned int rtl8169_rx_config =
(RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift);
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_re_netmap_linux.h>
+#endif
+
static void mdio_write(void __iomem *ioaddr, int reg_addr, int value)
{
int i;
@@ -3229,6 +3233,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_dev_run_wake(pdev))
pm_runtime_put_noidle(&pdev->dev);
+#ifdef DEV_NETMAP
+ re_netmap_attach(tp);
+#endif /* DEV_NETMAP */
+
out:
return rc;
@@ -3257,6 +3265,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
if (pci_dev_run_wake(pdev))
pm_runtime_get_noresume(&pdev->dev);
+#ifdef DEV_NETMAP
+ netmap_detach(dev);
+#endif /* DEV_NETMAP */
+
/* restore original MAC address */
rtl_rar_set(tp, dev->perm_addr);
@@ -3303,6 +3315,10 @@ static int rtl8169_open(struct net_device *dev)
napi_enable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl_hw_start(dev);
rtl8169_request_timer(dev);
@@ -4018,6 +4034,11 @@ static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc)
static int rtl8169_rx_fill(struct rtl8169_private *tp)
{
unsigned int i;
+#ifdef DEV_NETMAP
+ re_netmap_tx_init(tp);
+ if (re_netmap_rx_init(tp))
+ return 0; // success
+#endif /* DEV_NETMAP */
for (i = 0; i < NUM_RX_DESC; i++) {
void *data;
@@ -4119,11 +4140,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev)
/* Wait for any pending NAPI task to complete */
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl8169_irq_mask_and_ack(ioaddr);
tp->intr_mask = 0xffff;
RTL_W16(IntrMask, tp->intr_event);
napi_enable(&tp->napi);
+
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
}
static void rtl8169_reinit_task(struct work_struct *work)
@@ -4395,6 +4424,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev,
{
unsigned int dirty_tx, tx_left;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(dev, 0))
+ return;
+#endif /* DEV_NETMAP */
+
dirty_tx = tp->dirty_tx;
smp_rmb();
tx_left = tp->cur_tx - dirty_tx;
@@ -4490,6 +4524,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
unsigned int count;
int polling = (budget != ~(u32)0) ? 1 : 0;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(dev, 0, &count))
+ return count;
+#endif /* DEV_NETMAP */
+
cur_rx = tp->cur_rx;
rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
rx_left = min(rx_left, budget);
@@ -4691,6 +4730,10 @@ static void rtl8169_down(struct net_device *dev)
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
spin_lock_irq(&tp->lock);
rtl8169_asic_down(ioaddr);

View File

@ -1,114 +0,0 @@
diff --git a/r8169.c b/r8169.c
index 7ffdb80..6bae7e6 100644
--- a/r8169.c
+++ b/r8169.c
@@ -590,6 +590,10 @@ static int rtl8169_poll(struct napi_struct *napi, int budget);
static const unsigned int rtl8169_rx_config =
(RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift);
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_re_netmap_linux.h>
+#endif
+
static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
{
void __iomem *ioaddr = tp->mmio_addr;
@@ -3207,6 +3211,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_dev_run_wake(pdev))
pm_runtime_put_noidle(&pdev->dev);
+#ifdef DEV_NETMAP
+ re_netmap_attach(tp);
+#endif /* DEV_NETMAP */
+
netif_carrier_off(dev);
out:
@@ -3238,6 +3246,9 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
cancel_delayed_work_sync(&tp->task);
rtl_release_firmware(tp);
+#ifdef DEV_NETMAP
+ netmap_detach(dev);
+#endif /* DEV_NETMAP */
unregister_netdev(dev);
@@ -3291,6 +3302,10 @@ static int rtl8169_open(struct net_device *dev)
napi_enable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl8169_init_phy(dev, tp);
/*
@@ -4074,6 +4089,11 @@ static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc)
static int rtl8169_rx_fill(struct rtl8169_private *tp)
{
unsigned int i;
+#ifdef DEV_NETMAP
+ re_netmap_tx_init(tp);
+ if (re_netmap_rx_init(tp))
+ return 0; // success
+#endif /* DEV_NETMAP */
for (i = 0; i < NUM_RX_DESC; i++) {
void *data;
@@ -4175,11 +4195,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev)
/* Wait for any pending NAPI task to complete */
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl8169_irq_mask_and_ack(ioaddr);
tp->intr_mask = 0xffff;
RTL_W16(IntrMask, tp->intr_event);
napi_enable(&tp->napi);
+
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
}
static void rtl8169_reinit_task(struct work_struct *work)
@@ -4452,6 +4480,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev,
{
unsigned int dirty_tx, tx_left;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(dev, 0))
+ return;
+#endif /* DEV_NETMAP */
+
dirty_tx = tp->dirty_tx;
smp_rmb();
tx_left = tp->cur_tx - dirty_tx;
@@ -4547,6 +4580,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
unsigned int count;
int polling = (budget != ~(u32)0) ? 1 : 0;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(dev, 0, &count))
+ return count;
+#endif /* DEV_NETMAP */
+
cur_rx = tp->cur_rx;
rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
rx_left = min(rx_left, budget);
@@ -4769,6 +4807,10 @@ static void rtl8169_down(struct net_device *dev)
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
spin_lock_irq(&tp->lock);
rtl8169_asic_down(ioaddr);

View File

@ -1,114 +0,0 @@
diff --git a/r8169.c b/r8169.c
index c8f47f1..a41e878 100644
--- a/r8169.c
+++ b/r8169.c
@@ -787,6 +787,10 @@ static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force)
}
}
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <if_re_netmap_linux.h>
+#endif
+
static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
{
void __iomem *ioaddr = tp->mmio_addr;
@@ -4167,6 +4171,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_dev_run_wake(pdev))
pm_runtime_put_noidle(&pdev->dev);
+#ifdef DEV_NETMAP
+ re_netmap_attach(tp);
+#endif /* DEV_NETMAP */
+
netif_carrier_off(dev);
out:
@@ -4201,6 +4209,9 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
unregister_netdev(dev);
rtl_release_firmware(tp);
+#ifdef DEV_NETMAP
+ netmap_detach(dev);
+#endif /* DEV_NETMAP */
if (pci_dev_run_wake(pdev))
pm_runtime_get_noresume(&pdev->dev);
@@ -4298,6 +4309,10 @@ static int rtl8169_open(struct net_device *dev)
napi_enable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl8169_init_phy(dev, tp);
rtl8169_set_features(dev, dev->features);
@@ -5252,6 +5267,11 @@ static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc)
static int rtl8169_rx_fill(struct rtl8169_private *tp)
{
unsigned int i;
+#ifdef DEV_NETMAP
+ re_netmap_tx_init(tp);
+ if (re_netmap_rx_init(tp))
+ return 0; // success
+#endif /* DEV_NETMAP */
for (i = 0; i < NUM_RX_DESC; i++) {
void *data;
@@ -5348,11 +5368,19 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev)
/* Wait for any pending NAPI task to complete */
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
rtl8169_irq_mask_and_ack(tp);
tp->intr_mask = 0xffff;
RTL_W16(IntrMask, tp->intr_event);
napi_enable(&tp->napi);
+
+#ifdef DEV_NETMAP
+ netmap_enable_all_rings(dev);
+#endif /* DEV_NETMAP */
}
static void rtl8169_reinit_task(struct work_struct *work)
@@ -5627,6 +5655,11 @@ static void rtl8169_tx_interrupt(struct net_device *dev,
{
unsigned int dirty_tx, tx_left;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(dev, 0))
+ return;
+#endif /* DEV_NETMAP */
+
dirty_tx = tp->dirty_tx;
smp_rmb();
tx_left = tp->cur_tx - dirty_tx;
@@ -5714,6 +5747,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
unsigned int cur_rx, rx_left;
unsigned int count;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(dev, 0, &count))
+ return count;
+#endif /* DEV_NETMAP */
+
cur_rx = tp->cur_rx;
rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
rx_left = min(rx_left, budget);
@@ -5920,6 +5958,10 @@ static void rtl8169_down(struct net_device *dev)
napi_disable(&tp->napi);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif /* DEV_NETMAP */
+
spin_lock_irq(&tp->lock);
rtl8169_hw_reset(tp);

View File

@ -1,85 +0,0 @@
diff --git a/virtio_net.c b/virtio_net.c
index b0577dd..6516934 100644
--- a/virtio_net.c
+++ b/virtio_net.c
@@ -64,6 +64,10 @@ struct virtnet_info
struct page *pages;
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <virtio_netmap.h>
+#endif
+
struct skb_vnet_hdr {
union {
struct virtio_net_hdr hdr;
@@ -121,6 +125,10 @@ static void skb_xmit_done(struct virtqueue *svq)
/* Suppress further interrupts. */
svq->vq_ops->disable_cb(svq);
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(vi->dev, 0))
+ return;
+#endif
/* We were probably waiting for more output buffers. */
netif_wake_queue(vi->dev);
}
@@ -470,7 +478,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
void *buf;
unsigned int len, received = 0;
+#ifdef DEV_NETMAP
+ int work_done = 0;
+ if (netmap_rx_irq(vi->dev, 0, &work_done)) {
+ napi_complete(napi);
+ ND("called netmap_rx_irq");
+
+ return 1;
+ }
+#endif
again:
while (received < budget &&
(buf = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
@@ -638,6 +655,10 @@ static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ virtio_netmap_init_buffers(vi);
+ netmap_enable_all_rings(dev);
+#endif
napi_enable(&vi->napi);
/* If all buffers were filled by other side before we napi_enabled, we
@@ -700,6 +721,9 @@ static int virtnet_close(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif
napi_disable(&vi->napi);
return 0;
@@ -985,6 +1009,10 @@ static int virtnet_probe(struct virtio_device *vdev)
goto unregister;
}
+#ifdef DEV_NETMAP
+ virtio_netmap_attach(vi);
+#endif
+
vi->status = VIRTIO_NET_S_LINK_UP;
virtnet_update_status(vi);
netif_carrier_on(dev);
@@ -1028,6 +1056,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+#ifdef DEV_NETMAP
+ netmap_detach(vi->dev);
+#endif
/* Stop all the virtqueues. */
vdev->config->reset(vdev);

View File

@ -1,85 +0,0 @@
diff --git a/virtio_net.c b/virtio_net.c
index b6d4028..a9be38d 100644
--- a/virtio_net.c
+++ b/virtio_net.c
@@ -67,6 +67,10 @@ struct virtnet_info {
struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <virtio_netmap.h>
+#endif
+
struct skb_vnet_hdr {
union {
struct virtio_net_hdr hdr;
@@ -124,6 +128,10 @@ static void skb_xmit_done(struct virtqueue *svq)
/* Suppress further interrupts. */
virtqueue_disable_cb(svq);
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(vi->dev, 0))
+ return;
+#endif
/* We were probably waiting for more output buffers. */
netif_wake_queue(vi->dev);
}
@@ -467,7 +475,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
void *buf;
unsigned int len, received = 0;
+#ifdef DEV_NETMAP
+ int work_done = 0;
+ if (netmap_rx_irq(vi->dev, 0, &work_done)) {
+ napi_complete(napi);
+ ND("called netmap_rx_irq");
+
+ return 1;
+ }
+#endif
again:
while (received < budget &&
(buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
@@ -638,6 +655,10 @@ static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ virtio_netmap_init_buffers(vi);
+ netmap_enable_all_rings(dev);
+#endif
napi_enable(&vi->napi);
/* If all buffers were filled by other side before we napi_enabled, we
@@ -700,6 +721,9 @@ static int virtnet_close(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif
napi_disable(&vi->napi);
return 0;
@@ -986,6 +1010,10 @@ static int virtnet_probe(struct virtio_device *vdev)
goto unregister;
}
+#ifdef DEV_NETMAP
+ virtio_netmap_attach(vi);
+#endif
+
/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -1035,6 +1063,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+#ifdef DEV_NETMAP
+ netmap_detach(vi->dev);
+#endif
/* Stop all the virtqueues. */
vdev->config->reset(vdev);

View File

@ -1,85 +0,0 @@
diff --git a/virtio_net.c b/virtio_net.c
index 82dba5a..f217797 100644
--- a/virtio_net.c
+++ b/virtio_net.c
@@ -67,6 +67,10 @@ struct virtnet_info {
struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <virtio_netmap.h>
+#endif
+
struct skb_vnet_hdr {
union {
struct virtio_net_hdr hdr;
@@ -124,6 +128,10 @@ static void skb_xmit_done(struct virtqueue *svq)
/* Suppress further interrupts. */
virtqueue_disable_cb(svq);
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(vi->dev, 0))
+ return;
+#endif
/* We were probably waiting for more output buffers. */
netif_wake_queue(vi->dev);
}
@@ -481,7 +489,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
void *buf;
unsigned int len, received = 0;
+#ifdef DEV_NETMAP
+ int work_done = 0;
+ if (netmap_rx_irq(vi->dev, 0, &work_done)) {
+ napi_complete(napi);
+ ND("called netmap_rx_irq");
+
+ return 1;
+ }
+#endif
again:
while (received < budget &&
(buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
@@ -652,6 +669,10 @@ static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ virtio_netmap_init_buffers(vi);
+ netmap_enable_all_rings(dev);
+#endif
virtnet_napi_enable(vi);
return 0;
}
@@ -705,6 +726,9 @@ static int virtnet_close(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif
napi_disable(&vi->napi);
return 0;
@@ -991,6 +1015,10 @@ static int virtnet_probe(struct virtio_device *vdev)
goto unregister;
}
+#ifdef DEV_NETMAP
+ virtio_netmap_attach(vi);
+#endif
+
/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -1040,6 +1068,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+#ifdef DEV_NETMAP
+ netmap_detach(vi->dev);
+#endif
/* Stop all the virtqueues. */
vdev->config->reset(vdev);

View File

@ -1,90 +0,0 @@
diff --git a/virtio_net.c b/virtio_net.c
index 4880aa8..6329c3a 100644
--- a/virtio_net.c
+++ b/virtio_net.c
@@ -80,6 +80,10 @@ struct virtnet_info {
struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <virtio_netmap.h>
+#endif
+
struct skb_vnet_hdr {
union {
struct virtio_net_hdr hdr;
@@ -137,6 +141,10 @@ static void skb_xmit_done(struct virtqueue *svq)
/* Suppress further interrupts. */
virtqueue_disable_cb(svq);
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(vi->dev, 0))
+ return;
+#endif
/* We were probably waiting for more output buffers. */
netif_wake_queue(vi->dev);
}
@@ -517,7 +525,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
void *buf;
unsigned int len, received = 0;
+#ifdef DEV_NETMAP
+ int work_done = 0;
+ if (netmap_rx_irq(vi->dev, 0, &work_done)) {
+ napi_complete(napi);
+ ND("called netmap_rx_irq");
+
+ return 1;
+ }
+#endif
again:
while (received < budget &&
(buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
@@ -727,7 +744,15 @@ static void virtnet_netpoll(struct net_device *dev)
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ int ok = virtio_netmap_init_buffers(vi);
+ netmap_enable_all_rings(dev);
+ if (ok) {
+ virtnet_napi_enable(vi);
+ return 0;
+ }
+#endif
/* Make sure we have some buffers: if oom use wq. */
if (!try_fill_recv(vi, GFP_KERNEL))
queue_delayed_work(system_nrt_wq, &vi->refill, 0);
@@ -785,6 +810,9 @@ static int virtnet_close(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
napi_disable(&vi->napi);
@@ -1107,6 +1135,10 @@ static int virtnet_probe(struct virtio_device *vdev)
goto unregister;
}
+#ifdef DEV_NETMAP
+ virtio_netmap_attach(vi);
+#endif
+
/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -1170,6 +1202,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+#ifdef DEV_NETMAP
+ netmap_detach(vi->dev);
+#endif
unregister_netdev(vi->dev);
remove_vq_common(vi);

View File

@ -1,90 +0,0 @@
diff --git a/virtio_net.c b/virtio_net.c
index f18149a..95e1580 100644
--- a/virtio_net.c
+++ b/virtio_net.c
@@ -90,6 +90,10 @@ struct virtnet_info {
struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <virtio_netmap.h>
+#endif
+
struct skb_vnet_hdr {
union {
struct virtio_net_hdr hdr;
@@ -147,6 +151,10 @@ static void skb_xmit_done(struct virtqueue *svq)
/* Suppress further interrupts. */
virtqueue_disable_cb(svq);
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(vi->dev, 0))
+ return;
+#endif
/* We were probably waiting for more output buffers. */
netif_wake_queue(vi->dev);
}
@@ -529,7 +537,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
void *buf;
unsigned int len, received = 0;
+#ifdef DEV_NETMAP
+ int work_done = 0;
+ if (netmap_rx_irq(vi->dev, 0, &work_done)) {
+ napi_complete(napi);
+ ND("called netmap_rx_irq");
+
+ return 1;
+ }
+#endif
again:
while (received < budget &&
(buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
@@ -742,6 +759,15 @@ static void virtnet_netpoll(struct net_device *dev)
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ int ok = virtio_netmap_init_buffers(vi);
+
+ netmap_enable_all_rings(dev);
+ if (ok) {
+ virtnet_napi_enable(vi);
+ return 0;
+ }
+#endif
/* Make sure we have some buffers: if oom use wq. */
if (!try_fill_recv(vi, GFP_KERNEL))
@@ -810,6 +836,9 @@ static int virtnet_close(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
napi_disable(&vi->napi);
@@ -1148,6 +1177,10 @@ static int virtnet_probe(struct virtio_device *vdev)
goto unregister;
}
+#ifdef DEV_NETMAP
+ virtio_netmap_attach(vi);
+#endif
+
/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -1211,6 +1244,9 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+#ifdef DEV_NETMAP
+ netmap_detach(vi->dev);
+#endif
/* Prevent config work handler from accessing the device. */
mutex_lock(&vi->config_lock);
vi->config_enable = false;

View File

@ -1,91 +0,0 @@
diff --git a/virtio_net.c b/virtio_net.c
index 35c00c5..8aaaa7e 100644
--- a/virtio_net.c
+++ b/virtio_net.c
@@ -132,6 +132,10 @@ struct virtnet_info {
struct notifier_block nb;
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <virtio_netmap.h>
+#endif
+
struct skb_vnet_hdr {
union {
struct virtio_net_hdr hdr;
@@ -211,6 +215,10 @@ static void skb_xmit_done(struct virtqueue *vq)
/* Suppress further interrupts. */
virtqueue_disable_cb(vq);
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(vi->dev, vq2txq(vq)))
+ return;
+#endif
/* We were probably waiting for more output buffers. */
netif_wake_subqueue(vi->dev, vq2txq(vq));
}
@@ -603,7 +611,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = rq->vq->vdev->priv;
void *buf;
unsigned int len, received = 0;
+#ifdef DEV_NETMAP
+ int work_done = 0;
+ if (netmap_rx_irq(vi->dev, vq2rxq(rq->vq), &work_done)) {
+ napi_complete(napi);
+ ND("called netmap_rx_irq");
+
+ return 1;
+ }
+#endif
again:
while (received < budget &&
(buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
@@ -635,6 +652,16 @@ static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
int i;
+#ifdef DEV_NETMAP
+ int ok = virtio_netmap_init_buffers(vi);
+
+ netmap_enable_all_rings(dev);
+ if (ok) {
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ virtnet_napi_enable(&vi->rq[i]);
+ return 0;
+ }
+#endif
for (i = 0; i < vi->max_queue_pairs; i++) {
/* Make sure we have some buffers: if oom use wq. */
@@ -909,6 +936,9 @@ static int virtnet_close(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i;
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
@@ -1572,6 +1602,10 @@ static int virtnet_probe(struct virtio_device *vdev)
goto free_recv_bufs;
}
+#ifdef DEV_NETMAP
+ virtio_netmap_attach(vi);
+#endif
+
/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -1618,6 +1652,9 @@ static void virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+#ifdef DEV_NETMAP
+ netmap_detach(vi->dev);
+#endif
unregister_hotcpu_notifier(&vi->nb);
/* Prevent config work handler from accessing the device. */

View File

@ -1,91 +0,0 @@
diff --git a/virtio_net.c b/virtio_net.c
index 3d2a90a..ae899a4 100644
--- a/virtio_net.c
+++ b/virtio_net.c
@@ -131,6 +131,10 @@ struct virtnet_info {
struct notifier_block nb;
};
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <virtio_netmap.h>
+#endif
+
struct skb_vnet_hdr {
union {
struct virtio_net_hdr hdr;
@@ -210,6 +214,10 @@ static void skb_xmit_done(struct virtqueue *vq)
/* Suppress further interrupts. */
virtqueue_disable_cb(vq);
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(vi->dev, vq2txq(vq)))
+ return;
+#endif
/* We were probably waiting for more output buffers. */
netif_wake_subqueue(vi->dev, vq2txq(vq));
}
@@ -603,7 +611,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = rq->vq->vdev->priv;
void *buf;
unsigned int r, len, received = 0;
+#ifdef DEV_NETMAP
+ int work_done = 0;
+ if (netmap_rx_irq(vi->dev, vq2rxq(rq->vq), &work_done)) {
+ napi_complete(napi);
+ ND("called netmap_rx_irq");
+
+ return 1;
+ }
+#endif
again:
while (received < budget &&
(buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
@@ -636,6 +653,16 @@ static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
int i;
+#ifdef DEV_NETMAP
+ int ok = virtio_netmap_init_buffers(vi);
+
+ netmap_enable_all_rings(dev);
+ if (ok) {
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ virtnet_napi_enable(&vi->rq[i]);
+ return 0;
+ }
+#endif
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
@@ -927,6 +954,9 @@ static int virtnet_close(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i;
+#ifdef DEV_NETMAP
+ netmap_disable_all_rings(dev);
+#endif
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
@@ -1592,6 +1622,10 @@ static int virtnet_probe(struct virtio_device *vdev)
goto free_recv_bufs;
}
+#ifdef DEV_NETMAP
+ virtio_netmap_attach(vi);
+#endif
+
/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -1638,6 +1672,9 @@ static void virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+#ifdef DEV_NETMAP
+ netmap_detach(vi->dev);
+#endif
unregister_hotcpu_notifier(&vi->nb);
/* Prevent config work handler from accessing the device. */

View File

@ -1,407 +0,0 @@
/*
* Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id: forcedeth_netmap.h 10670 2012-02-27 21:15:38Z luigi $
*
* netmap support for: forcedeth (nfe, linux)
* For details on netmap support see ixgbe_netmap.h
The driver supports ORIGinal and EXtended descriptors through unions.
We remove the .orig and .ex suffix for brevity.
Pointers in the ring (N slots) are
first_rx = 0, last_rx = N-1, get_rx = put_rx = 0 at init
Following init there is a call to nv_alloc_rx_optimized() which does
less_rx = get_rx - 1
for (put_rx = 0; put_rx != less_rx; put_rx++)
put_rx.flags = LEN | NV_RX2_AVAIL;
so it leaves one free slot and put_rx pointing at the end.
Basically, get_rx is where new packets arrive, put_rx is where
new buffers are added.
The rx_intr aka nv_rx_process_optimized() scans
while (get_rx != put_rx && !(get_rx.flags & NV_RX2_AVAIL)) {
...
get_rx++
}
followed by a nv_alloc_rx_optimized().
This makes sure that there is always a free slot.
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
#define SOFTC_T fe_priv
/*
* Register/unregister. We are already under netmap lock.
* only called on the first register or the last unregister.
* The "forcedeth" driver is poorly written, the reinit routine
* is replicated multiple times and one way to achieve it is to
* nv_change_mtu twice above ETH_DATA_LEN.
*/
static int
forcedeth_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *np = netdev_priv(ifp);
u8 __iomem *base = get_hwbase(ifp);
// first half of nv_change_mtu() - down
nv_disable_irq(ifp);
nv_napi_disable(ifp);
netif_tx_lock_bh(ifp);
netif_addr_lock(ifp);
spin_lock(&np->lock);
/* stop engines */
nv_stop_rxtx(ifp);
nv_txrx_reset(ifp);
/* drain rx queue */
nv_drain_rxtx(ifp);
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
// second half of nv_change_mtu() -- up
if (nv_init_ring(ifp)) {
if (!np->in_shutdown)
mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
}
/* reinit nic view of the rx queue */
writel(np->rx_buf_sz, base + NvRegOffloadConfig);
setup_hw_rings(ifp, NV_SETUP_RX_RING | NV_SETUP_TX_RING);
writel(((np->rx_ring_size-1) << NVREG_RINGSZ_RXSHIFT) + ((np->tx_ring_size-1) << NVREG_RINGSZ_TXSHIFT),
base + NvRegRingSizes);
pci_push(base);
writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(ifp) + NvRegTxRxControl);
pci_push(base);
/* restart rx engine */
nv_start_rxtx(ifp);
spin_unlock(&np->lock);
netif_addr_unlock(ifp);
netif_tx_unlock_bh(ifp);
nv_napi_enable(ifp);
nv_enable_irq(ifp);
return (0);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
forcedeth_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* device-specific */
struct SOFTC_T *np = netdev_priv(ifp);
struct ring_desc_ex *txr = np->tx_ring.ex;
uint32_t lastpkt = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
u_int k;
/*
* First part: process new packets to send.
*/
if (!netif_carrier_ok(ifp)) {
goto out;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = np->put_tx.ex - txr; // NIC pointer
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
struct ring_desc_ex *put_tx = txr + nic_i;
// XXX check who needs lastpkt
int cmd = (len - 1) | NV_TX2_VALID | lastpkt;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
put_tx->bufhigh = htole32(dma_high(paddr));
put_tx->buflow = htole32(dma_low(paddr));
put_tx->flaglen = htole32(cmd);
put_tx->txvlan = 0;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
np->put_tx.ex = txr + nic_i;
kring->nr_hwcur = head;
wmb(); /* synchronize writes to the NIC ring */
/* restart tx unit where is the new index ? */
writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits,
get_hwbase(ifp) + NvRegTxRxControl);
}
/*
* Second part: reclaim buffers for completed transmissions
*/
/* Sync the TX descriptor list */
rmb();
nic_i = np->get_tx.ex - txr;
k = np->put_tx.ex - txr;
if (nic_i != k) {
for (n = 0; nic_i != k; n++) {
uint32_t cmdstat = le32toh(txr[nic_i].flaglen);
if (cmdstat & NV_TX2_VALID)
break;
if (++nic_i == np->tx_ring_size)
nic_i = 0;
}
if (n > 0) {
np->get_tx.ex = txr + nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
out:
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
forcedeth_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct SOFTC_T *np = netdev_priv(ifp);
struct ring_desc_ex *rxr = np->rx_ring.ex;
u_int refill; // refill position
if (head > lim)
return netmap_ring_reinit(kring);
/*
* First part: import newly received packets.
*/
rmb();
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = np->get_rx.ex - rxr; /* next pkt to check */
/* put_rx is the refill position, one before nr_hwcur.
* This slot is not available
*/
refill = np->put_rx.ex - rxr; /* refill position */
nm_i = netmap_idx_n2k(kring, nic_i);
while (nic_i != refill) {
uint32_t statlen = le32toh(rxr[nic_i].flaglen);
if (statlen & NV_RX2_AVAIL) /* still owned by the NIC */
break;
ring->slot[nm_i].len = statlen & LEN_MASK_V2; // XXX crc?
ring->slot[nm_i].flags = slot_flags;
// ifp->stats.rx_packets++;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
np->get_rx.ex = rxr + nic_i;
kring->nr_hwtail = nm_i;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur; // refill is one before nic_i
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
refill = np->put_rx.ex - rxr; /* refill position */
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
struct ring_desc_ex *desc = rxr + nic_i;
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
desc->flaglen = htole32(NETMAP_BUF_SIZE);
desc->bufhigh = htole32(dma_high(paddr));
desc->buflow = htole32(dma_low(paddr));
// enable the previous buffer
rxr[refill].flaglen |= htole32(NV_RX2_AVAIL);
refill = nm_next(refill, lim);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
np->put_rx.ex = rxr + refill;
/* Flush the RX DMA ring */
wmb();
}
/* tell userspace that there are might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/*
* Additional routines to init the tx and rx rings.
* In other drivers we do that inline in the main code.
*/
static int
forcedeth_netmap_tx_init(struct SOFTC_T *np)
{
struct ring_desc_ex *desc;
int i, n;
struct netmap_adapter *na = NA(np->dev);
struct netmap_slot *slot;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_TX, 0, 0);
/* slot is NULL if we are not in netmap mode */
if (!slot)
return 0;
/* in netmap mode, overwrite addresses and maps */
//txd = np->rl_ldata.rl_tx_desc;
desc = np->tx_ring.ex;
n = np->tx_ring_size;
/* l points in the netmap ring, i points in the NIC ring */
for (i = 0; i < n; i++) {
int l = netmap_idx_n2k(&na->tx_rings[0], i);
uint64_t paddr;
PNMB(slot + l, &paddr);
desc[i].flaglen = 0;
desc[i].bufhigh = htole32(dma_high(paddr));
desc[i].buflow = htole32(dma_low(paddr));
}
return 1;
}
static int
forcedeth_netmap_rx_init(struct SOFTC_T *np)
{
struct netmap_adapter *na = NA(np->dev);
struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
struct ring_desc_ex *desc = np->rx_ring.ex;
uint32_t cmdstat;
int i, lim;
if (!slot)
return 0;
/*
* Do not release the slots owned by userspace,
* and also keep one empty.
*/
lim = np->rx_ring_size - 1 - nm_kr_rxspace(&na->rx_rings[0]);
for (i = 0; i < np->rx_ring_size; i++) {
void *addr;
uint64_t paddr;
int l = netmap_idx_n2k(&na->rx_rings[0], i);
addr = PNMB(slot + l, &paddr);
netmap_reload_map(np->rl_ldata.rl_rx_mtag,
np->rl_ldata.rl_rx_desc[i].rx_dmamap, addr);
desc[i].bufhigh = htole32(dma_high(paddr));
desc[i].buflow = htole32(dma_low(paddr));
cmdstat = NETMAP_BUF_SIZE;
if (i < lim)
cmdstat |= NV_RX2_AVAIL;
desc[i].flaglen = htole32(cmdstat);
}
// XXX ring end anywhere ?
np->get_rx.ex = desc;
np->put_rx.ex = desc + lim;
return 1;
}
static void
forcedeth_netmap_attach(struct SOFTC_T *np)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = np->dev;
na.num_tx_desc = np->tx_ring_size;
na.num_rx_desc = np->tx_ring_size;
na.nm_txsync = forcedeth_netmap_txsync;
na.nm_rxsync = forcedeth_netmap_rxsync;
na.nm_register = forcedeth_netmap_reg;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,352 +0,0 @@
/*
* Copyright (C) 2012-2014 Gaetano Catalli, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id: if_e1000_netmap.h 10878 2012-04-12 22:28:48Z luigi $
*
* netmap support for: e1000 (linux version)
* For details on netmap support please see ixgbe_netmap.h
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
#define SOFTC_T e1000_adapter
/*
* Register/unregister. We are already under netmap lock.
*/
static int
e1000_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *adapter = netdev_priv(ifp);
/* protect against other reinit */
while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
usleep_range(1000, 2000);
rtnl_lock();
if (netif_running(adapter->netdev))
e1000_down(adapter);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
if (netif_running(adapter->netdev))
e1000_up(adapter);
else
e1000_reset(adapter);
rtnl_unlock();
clear_bit(__E1000_RESETTING, &adapter->flags);
return (0);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
e1000_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct e1000_tx_ring* txr = &adapter->tx_ring[ring_nr];
rmb();
/*
* First part: process new packets to send.
*/
if (!netif_carrier_ok(ifp)) {
goto out;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
struct e1000_tx_desc *curr = E1000_TX_DESC(*txr, nic_i);
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, paddr);
curr->buffer_addr = htole64(paddr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data = htole32(adapter->txd_cmd |
len | flags |
E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
wmb(); /* synchronize writes to the NIC ring */
txr->next_to_use = nic_i; /* XXX what for ? */
/* (re)start the tx unit up to slot nic_i (excluded) */
writel(nic_i, adapter->hw.hw_addr + txr->tdt);
mmiowb(); // XXX where do we need this ?
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = readl(adapter->hw.hw_addr + txr->tdh);
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
txr->next_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
out:
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
e1000_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct e1000_rx_ring *rxr = &adapter->rx_ring[ring_nr];
if (!netif_carrier_ok(ifp)) {
goto out;
}
if (head > lim)
return netmap_ring_reinit(kring);
rmb();
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_clean;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
struct e1000_rx_desc *curr = E1000_RX_DESC(*rxr, nic_i);
uint32_t staterr = le32toh(curr->status);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->length) - 4;
ring->slot[nm_i].flags = slot_flags;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_clean = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
struct e1000_rx_desc *curr = E1000_RX_DESC(*rxr, nic_i);
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
// netmap_reload_map(...)
curr->buffer_addr = htole64(paddr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
rxr->next_to_use = nic_i; // XXX not really used
wmb();
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
writel(nic_i, adapter->hw.hw_addr + rxr->rdt);
}
out:
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/* diagnostic routine to catch errors */
static void e1000_no_rx_alloc(struct SOFTC_T *adapter,
struct e1000_rx_ring *rxr, int cleaned_count)
{
D("e1000->alloc_rx_buf should not be called");
}
/*
* Make the tx and rx rings point to the netmap buffers.
*/
static int e1000_netmap_init_buffers(struct SOFTC_T *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct ifnet *ifp = adapter->netdev;
struct netmap_adapter* na = NA(ifp);
struct netmap_slot* slot;
struct e1000_tx_ring* txr = &adapter->tx_ring[0];
unsigned int i, r, si;
uint64_t paddr;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
adapter->alloc_rx_buf = e1000_no_rx_alloc;
for (r = 0; r < na->num_rx_rings; r++) {
struct e1000_rx_ring *rxr;
slot = netmap_reset(na, NR_RX, r, 0);
if (!slot) {
D("strange, null netmap ring %d", r);
return 0;
}
rxr = &adapter->rx_ring[r];
for (i = 0; i < rxr->count; i++) {
// XXX the skb check and cleanup can go away
struct e1000_buffer *bi = &rxr->buffer_info[i];
si = netmap_idx_n2k(&na->rx_rings[r], i);
PNMB(slot + si, &paddr);
if (bi->skb)
D("rx buf %d was set", i);
bi->skb = NULL;
// netmap_load_map(...)
E1000_RX_DESC(*rxr, i)->buffer_addr = htole64(paddr);
}
rxr->next_to_use = 0;
/* preserve buffers already made available to clients */
i = rxr->count - 1 - nm_kr_rxspace(&na->rx_rings[0]);
if (i < 0) // XXX something wrong here, can it really happen ?
i += rxr->count;
D("i now is %d", i);
wmb(); /* Force memory writes to complete */
writel(i, hw->hw_addr + rxr->rdt);
}
/* now initialize the tx ring(s) */
slot = netmap_reset(na, NR_TX, 0, 0);
for (i = 0; i < na->num_tx_desc; i++) {
si = netmap_idx_n2k(&na->tx_rings[0], i);
PNMB(slot + si, &paddr);
// netmap_load_map(...)
E1000_TX_DESC(*txr, i)->buffer_addr = htole64(paddr);
}
return 1;
}
static void
e1000_netmap_attach(struct SOFTC_T *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->netdev;
na.num_tx_desc = adapter->tx_ring[0].count;
na.num_rx_desc = adapter->rx_ring[0].count;
na.nm_register = e1000_netmap_reg;
na.nm_txsync = e1000_netmap_txsync;
na.nm_rxsync = e1000_netmap_rxsync;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,377 +0,0 @@
/*
* Copyright (C) 2012-2014 Gaetano Catalli, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id: if_e1000e_netmap.h 10670 2012-02-27 21:15:38Z luigi $
*
* netmap support for: e1000e (linux version)
* For details on netmap support please see ixgbe_netmap.h
* The driver supports 1 TX and 1 RX ring. Single lock.
* tx buffer address only written on change.
* Apparently the driver uses extended descriptors on rx from 3.2.32
* Rx Crc stripping ?
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
#define SOFTC_T e1000_adapter
/*
* Adaptation to different versions of the driver.
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0)
#warning this driver uses extended descriptors
#define NM_E1K_RX_DESC_T union e1000_rx_desc_extended
#define NM_E1R_RX_STATUS wb.upper.status_error
#define NM_E1R_RX_LENGTH wb.upper.length
#define NM_E1R_RX_BUFADDR read.buffer_addr
#else
#warning this driver uses regular descriptors
#define E1000_RX_DESC_EXT E1000_RX_DESC // XXX workaround
#define NM_E1K_RX_DESC_T struct e1000_rx_desc
#define NM_E1R_RX_STATUS status
#define NM_E1R_RX_BUFADDR buffer_addr
#define NM_E1R_RX_LENGTH length
#endif /* up to 3.2.x */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
#define NM_WR_TX_TAIL(_x) writel(_x, txr->tail) // XXX tx_ring
#define NM_WR_RX_TAIL(_x) writel(_x, rxr->tail) // XXX rx_ring
#define NM_RD_TX_HEAD() readl(txr->head)
#else
#define NM_WR_TX_TAIL(_x) writel(_x, adapter->hw.hw_addr + txr->tail)
#define NM_WR_RX_TAIL(_x) writel(_x, adapter->hw.hw_addr + rxr->tail)
#define NM_RD_TX_HEAD() readl(adapter->hw.hw_addr + txr->head)
#endif /* < 3.4.0 */
/*
* Register/unregister. We are already under netmap lock.
*/
static int
e1000_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *adapter = netdev_priv(ifp);
/* protect against other reinit */
while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
usleep_range(1000, 2000);
rtnl_lock();
if (netif_running(adapter->netdev))
e1000e_down(adapter);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
if (netif_running(adapter->netdev))
e1000e_up(adapter);
else
e1000e_reset(adapter); // XXX is it needed ?
rtnl_unlock();
clear_bit(__E1000_RESETTING, &adapter->state);
return (0);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
e1000_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct e1000_ring* txr = &adapter->tx_ring[ring_nr];
rmb();
/*
* First part: process new packets to send.
*/
if (!netif_carrier_ok(ifp)) {
goto out;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
struct e1000_tx_desc *curr = E1000_TX_DESC(*txr, nic_i);
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr)
curr->buffer_addr = htole64(paddr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data = htole32(adapter->txd_cmd | len | flags |
E1000_TXD_CMD_EOP);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
wmb(); /* synchronize writes to the NIC ring */
txr->next_to_use = nic_i;
NM_WR_TX_TAIL(nic_i);
mmiowb(); // XXX where do we need this ?
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = NM_RD_TX_HEAD(); // XXX could scan descriptors ?
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
txr->next_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
out:
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
e1000_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct e1000_ring *rxr = &adapter->rx_ring[ring_nr];
if (!netif_carrier_ok(ifp))
return 0;
if (head > lim)
return netmap_ring_reinit(kring);
rmb();
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
int strip_crc = (adapter->flags2 & FLAG2_CRC_STRIPPING) ? 0 : 4;
nic_i = rxr->next_to_clean;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
NM_E1K_RX_DESC_T *curr = E1000_RX_DESC_EXT(*rxr, nic_i);
uint32_t staterr = le32toh(curr->NM_E1R_RX_STATUS);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->NM_E1R_RX_LENGTH) - strip_crc;
ring->slot[nm_i].flags = slot_flags;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_clean = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
NM_E1K_RX_DESC_T *curr = E1000_RX_DESC_EXT(*rxr, nic_i);
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
curr->NM_E1R_RX_BUFADDR = htole64(paddr); /* reload ext.desc. addr. */
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr)
slot->flags &= ~NS_BUF_CHANGED;
}
curr->NM_E1R_RX_STATUS = 0;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
rxr->next_to_use = nic_i; // XXX not really used
wmb();
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
NM_WR_RX_TAIL(nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/* diagnostic routine to catch errors */
static void e1000e_no_rx_alloc(struct SOFTC_T *a, int n)
{
D("e1000->alloc_rx_buf should not be called");
}
/*
* Make the tx and rx rings point to the netmap buffers.
*/
static int e1000e_netmap_init_buffers(struct SOFTC_T *adapter)
{
struct ifnet *ifp = adapter->netdev;
struct netmap_adapter* na = NA(ifp);
struct netmap_slot* slot;
struct e1000_ring *rxr = adapter->rx_ring;
struct e1000_ring *txr = adapter->tx_ring;
int i, si;
uint64_t paddr;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_RX, 0, 0);
if (!slot)
return 0; // not in netmap mode XXX check is useless
adapter->alloc_rx_buf = (void*)e1000e_no_rx_alloc;
for (i = 0; i < rxr->count; i++) {
// XXX the skb check and cleanup can go away
struct e1000_buffer *bi = &rxr->buffer_info[i];
si = netmap_idx_n2k(&na->rx_rings[0], i);
PNMB(slot + si, &paddr);
if (bi->skb)
D("rx buf %d was set", i);
bi->skb = NULL; // XXX leak if set
// netmap_load_map(...)
E1000_RX_DESC_EXT(*rxr, i)->NM_E1R_RX_BUFADDR = htole64(paddr);
}
rxr->next_to_use = 0;
/* preserve buffers already made available to clients */
i = rxr->count - 1 - nm_kr_rxspace(&na->rx_rings[0]);
wmb(); /* Force memory writes to complete */
NM_WR_RX_TAIL(i);
/* now initialize the tx ring */
slot = netmap_reset(na, NR_TX, 0, 0);
for (i = 0; i < na->num_tx_desc; i++) {
si = netmap_idx_n2k(&na->tx_rings[0], i);
PNMB(slot + si, &paddr);
// netmap_load_map(...)
E1000_TX_DESC(*txr, i)->buffer_addr = htole64(paddr);
}
return 1;
}
static void
e1000_netmap_attach(struct SOFTC_T *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->netdev;
na.num_tx_desc = adapter->tx_ring->count;
na.num_rx_desc = adapter->rx_ring->count;
na.nm_register = e1000_netmap_reg;
na.nm_txsync = e1000_netmap_txsync;
na.nm_rxsync = e1000_netmap_rxsync;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,400 +0,0 @@
/*
* Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id: if_igb_netmap.h 10878 2012-04-12 22:28:48Z luigi $
*
* netmap support for: igb (linux version)
* For details on netmap support please see ixgbe_netmap.h
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
#define SOFTC_T igb_adapter
/*
* Adapt to different versions of the driver.
* E1000_TX_DESC_ADV etc. have dropped the _ADV suffix at some point.
* Also the first argument is now a pointer not the object.
*/
#ifndef E1000_TX_DESC_ADV
#define E1000_TX_DESC_ADV(_r, _i) IGB_TX_DESC(&(_r), _i)
#define E1000_RX_DESC_ADV(_r, _i) IGB_RX_DESC(&(_r), _i)
#define READ_TDH(_txr) ({struct e1000_hw *hw = &adapter->hw;rd32(E1000_TDH((_txr)->reg_idx));} )
#else /* up to 3.2, approximately */
#define igb_tx_buffer igb_buffer
#define tx_buffer_info buffer_info
#define igb_rx_buffer igb_buffer
#define rx_buffer_info buffer_info
#define READ_TDH(_txr) readl((_txr)->head)
#endif
/*
* Register/unregister. We are already under netmap lock.
* Only called on the first register or the last unregister.
*/
static int
igb_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *adapter = netdev_priv(ifp);
/* protect against other reinit */
while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
usleep_range(1000, 2000);
rtnl_lock();
if (netif_running(adapter->netdev))
igb_down(adapter);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
if (netif_running(adapter->netdev))
igb_up(adapter);
else
igb_reset(adapter); // XXX is it needed ?
rtnl_unlock();
clear_bit(__IGB_RESETTING, &adapter->state);
return (0);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
igb_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct igb_ring* txr = adapter->tx_ring[ring_nr];
rmb(); // XXX not in ixgbe ?
/*
* First part: process new packets to send.
*/
if (!netif_carrier_ok(ifp)) {
goto out;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
uint32_t olinfo_status=0;
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
union e1000_adv_tx_desc *curr =
E1000_TX_DESC_ADV(*txr, nic_i);
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->read.buffer_addr = htole64(paddr);
// XXX check olinfo and cmd_type_len
curr->read.olinfo_status =
htole32(olinfo_status |
(len<< E1000_ADVTXD_PAYLEN_SHIFT));
curr->read.cmd_type_len = htole32(len | flags |
E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_IFCS | E1000_TXD_CMD_EOP);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
wmb(); /* synchronize writes to the NIC ring */
txr->next_to_use = nic_i; /* XXX what for ? */
/* (re)start the tx unit up to slot nic_i (excluded) */
writel(nic_i, txr->tail);
mmiowb(); // XXX where do we need this ?
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = READ_TDH(txr);
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
txr->next_to_use = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
out:
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
igb_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct igb_ring *rxr = adapter->rx_ring[ring_nr];
if (!netif_carrier_ok(ifp))
return 0;
if (head > lim)
return netmap_ring_reinit(kring);
rmb();
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_clean;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
union e1000_adv_rx_desc *curr =
E1000_RX_DESC_ADV(*rxr, nic_i);
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
ring->slot[nm_i].flags = slot_flags;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_clean = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
union e1000_adv_rx_desc *curr = E1000_RX_DESC_ADV(*rxr, nic_i);
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
// netmap_reload_map(pdev, DMA_FROM_DEVICE, old_paddr, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->read.pkt_addr = htole64(paddr);
curr->read.hdr_addr = 0;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
wmb();
rxr->next_to_use = nic_i; // XXX not really used
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
writel(nic_i, rxr->tail);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
static int
igb_netmap_configure_tx_ring(struct SOFTC_T *adapter, int ring_nr)
{
struct ifnet *ifp = adapter->netdev;
struct netmap_adapter* na = NA(ifp);
struct netmap_slot* slot;
struct igb_ring *txr = adapter->tx_ring[ring_nr];
int i, si;
void *addr;
uint64_t paddr;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_TX, ring_nr, 0);
if (!slot)
return 0; // XXX this should never happen
for (i = 0; i < na->num_tx_desc; i++) {
union e1000_adv_tx_desc *tx_desc;
si = netmap_idx_n2k(&na->tx_rings[ring_nr], i);
addr = PNMB(slot + si, &paddr);
tx_desc = E1000_TX_DESC_ADV(*txr, i);
tx_desc->read.buffer_addr = htole64(paddr);
/* actually we don't care to init the rings here */
}
return 1; // success
}
static int
igb_netmap_configure_rx_ring(struct igb_ring *rxr)
{
struct ifnet *ifp = rxr->netdev;
struct netmap_adapter* na = NA(ifp);
int reg_idx = rxr->reg_idx;
struct netmap_slot* slot;
u_int i;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
/*
* XXX watch out, the main driver must not use
* split headers. The buffer len should be written
* into wr32(E1000_SRRCTL(reg_idx), srrctl) with options
* something like
* srrctl = ALIGN(buffer_len, 1024) >>
* E1000_SRRCTL_BSIZEPKT_SHIFT;
* srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
* srrctl |= E1000_SRRCTL_DROP_EN;
*/
slot = netmap_reset(na, NR_RX, reg_idx, 0);
if (!slot)
return 0; // not in netmap mode
for (i = 0; i < rxr->count; i++) {
union e1000_adv_rx_desc *rx_desc;
uint64_t paddr;
int si = netmap_idx_n2k(&na->rx_rings[reg_idx], i);
#if 0
// XXX the skb check can go away
struct igb_rx_buffer *bi = &rxr->rx_buffer_info[i];
if (bi->skb)
D("rx buf %d was set", i);
bi->skb = NULL; // XXX leak if set
#endif /* useless */
PNMB(slot + si, &paddr);
rx_desc = E1000_RX_DESC_ADV(*rxr, i);
rx_desc->read.hdr_addr = 0;
rx_desc->read.pkt_addr = htole64(paddr);
}
rxr->next_to_use = 0;
/* preserve buffers already made available to clients */
i = rxr->count - 1 - nm_kr_rxspace(&na->rx_rings[reg_idx]);
wmb(); /* Force memory writes to complete */
ND("%s rxr%d.tail %d", ifp->if_xname, reg_idx, i);
writel(i, rxr->tail);
return 1; // success
}
static void
igb_netmap_attach(struct SOFTC_T *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->netdev;
na.num_tx_desc = adapter->tx_ring_count;
na.num_rx_desc = adapter->rx_ring_count;
na.nm_register = igb_netmap_reg;
na.nm_txsync = igb_netmap_txsync;
na.nm_rxsync = igb_netmap_rxsync;
na.num_tx_rings = adapter->num_tx_queues;
na.num_rx_rings = adapter->num_rx_queues;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,350 +0,0 @@
/*
* Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id: if_re_netmap_linux.h 10679 2012-02-28 13:42:18Z luigi $
*
* netmap support for: r8169 (re, linux version)
* For details on netmap support please see ixgbe_netmap.h
* 1 tx ring, 1 rx ring, 1 lock, crcstrip ? reinit tx addr,
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
static void rtl8169_wait_for_quiescence(struct ifnet *);
#define SOFTC_T rtl8169_private
/*
* Register/unregister, mostly the reinit task
*/
static int
re_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
int error = 0;
rtnl_lock();
rtl8169_wait_for_quiescence(ifp);
rtl8169_close(ifp);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
if (rtl8169_open(ifp) < 0) {
error = ENOMEM;
goto fail;
}
} else {
fail:
nm_clear_native_flags(na);
error = rtl8169_open(ifp) ? EINVAL : 0;
}
rtnl_unlock();
return (error);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
re_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* device-specific */
struct SOFTC_T *sc = netdev_priv(ifp);
void __iomem *ioaddr = sc->mmio_addr;
rmb();
/*
* First part: process new packets to send.
*/
if (!netif_carrier_ok(ifp)) {
goto out;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = sc->cur_tx; // XXX use internal macro ?
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
struct TxDesc *curr = &sc->TxDescArray[nic_i];
uint32_t flags = slot->len | LastFrag | DescOwn | FirstFrag ;
NM_CHECK_ADDR_LEN(addr, len);
if (nic_i == lim) /* mark end of ring */
flags |= RingEnd;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr);
curr->addr = htole64(paddr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
curr->opts1 = htole32(flags);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
sc->cur_tx = nic_i;
wmb(); /* synchronize writes to the NIC ring */
RTL_W8(TxPoll, NPQ); /* start ? */
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
for (n = 0, nic_i = sc->dirty_tx; nic_i != sc->cur_tx; n++) {
if (le32toh(sc->TxDescArray[nic_i].opts1) & DescOwn)
break;
if (++nic_i == NUM_TX_DESC)
nic_i = 0;
}
if (n > 0) {
sc->dirty_tx = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
out:
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
re_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct SOFTC_T *sc = netdev_priv(ifp);
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
if (!netif_carrier_ok(ifp))
return 0;
if (head > lim)
return netmap_ring_reinit(kring);
rmb();
/*
* First part: import newly received packets.
*
* NOTE: This device uses all the buffers in the ring, so we
* need another termination condition in addition to DescOwn
* cleared (all buffers could have it cleared. The easiest one
* is to stop right before nm_hwcur.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
uint32_t stop_i = nm_prev(kring->nr_hwcur, lim);
nic_i = sc->cur_rx; /* next pkt to check */
nm_i = netmap_idx_n2k(kring, nic_i);
while (nm_i != stop_i) {
struct RxDesc *cur_rx = &sc->RxDescArray[nic_i];
uint32_t rxstat = le32toh(cur_rx->opts1);
uint32_t total_len;
if ((rxstat & DescOwn) != 0)
break;
total_len = rxstat & 0x00001FFF;
/* XXX subtract crc */
total_len = (total_len < 4) ? 0 : total_len - 4;
ring->slot[nm_i].len = total_len;
ring->slot[nm_i].flags = slot_flags;
// ifp->stats.rx_packets++;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
sc->cur_rx = nic_i;
kring->nr_hwtail = nm_i;
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
struct RxDesc *curr = &sc->RxDescArray[nic_i];
uint32_t flags = NETMAP_BUF_SIZE | DescOwn;
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (nic_i == lim) /* mark end of ring */
flags |= RingEnd;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_paddr, addr);
curr->addr = htole64(paddr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->opts1 = htole32(flags);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
wmb(); // XXX needed ?
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/*
* Additional routines to init the tx and rx rings.
* In other drivers we do that inline in the main code.
*/
static int
re_netmap_tx_init(struct SOFTC_T *sc)
{
struct netmap_adapter *na = NA(sc->dev);
struct netmap_slot *slot;
struct TxDesc *desc = sc->TxDescArray;
int i, l;
uint64_t paddr;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_TX, 0, 0);
/* slot is NULL if we are not in netmap mode XXX cannot happen */
if (!slot)
return 0;
/* l points in the netmap ring, i points in the NIC ring */
for (i = 0; i < na->num_tx_desc; i++) {
l = netmap_idx_n2k(&na->tx_rings[0], i);
PNMB(slot + l, &paddr);
desc[i].addr = htole64(paddr);
}
return 1;
}
static int
re_netmap_rx_init(struct SOFTC_T *sc)
{
struct netmap_adapter *na = NA(sc->dev);
struct netmap_slot *slot;
struct RxDesc *desc = sc->RxDescArray;
uint32_t cmdstat;
int i, lim, l;
uint64_t paddr;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_RX, 0, 0);
if (!slot)
return 0; /* XXX cannot happen */
/*
* Do not release the slots owned by userspace
* XXX we use all slots, so no '-1' here
* XXX do we need -1 instead ?
*/
lim = na->num_rx_desc /* - 1 */ - nm_kr_rxspace(&na->rx_rings[0]);
for (i = 0; i < na->num_rx_desc; i++) {
l = netmap_idx_n2k(&na->rx_rings[0], i);
PNMB(slot + l, &paddr);
cmdstat = NETMAP_BUF_SIZE;
if (i == na->num_rx_desc - 1)
cmdstat |= RingEnd;
if (i < lim)
cmdstat |= DescOwn;
desc[i].opts1 = htole32(cmdstat);
desc[i].addr = htole64(paddr);
}
return 1;
}
static void
re_netmap_attach(struct SOFTC_T *sc)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = sc->dev;
na.num_tx_desc = NUM_TX_DESC;
na.num_rx_desc = NUM_RX_DESC;
na.nm_txsync = re_netmap_txsync;
na.nm_rxsync = re_netmap_rxsync;
na.nm_register = re_netmap_reg;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,503 +0,0 @@
/*
* Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 230572 2012-01-26 09:55:16Z luigi $
*
* netmap support for: ixgbe (LINUX version)
*
* This file is meant to be a reference on how to implement
* netmap support for a network driver.
* This file contains code but only static or inline functions used
* by a single driver. To avoid replication of code we just #include
* it near the beginning of the standard driver.
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
#define SOFTC_T ixgbe_adapter
/*
* Adaptation to different versions of the driver.
* Recent drivers (3.4 and above) redefine some macros
*/
#ifndef IXGBE_TX_DESC_ADV
#define IXGBE_TX_DESC_ADV IXGBE_TX_DESC
#define IXGBE_RX_DESC_ADV IXGBE_RX_DESC
#endif
/*
* Register/unregister. We are already under netmap lock.
* Only called on the first register or the last unregister.
*/
static int
ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *adapter = netdev_priv(ifp);
// adapter->netdev->trans_start = jiffies; // disable watchdog ?
/* protect against other reinit */
while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
usleep_range(1000, 2000);
rtnl_lock();
if (netif_running(adapter->netdev))
ixgbe_down(adapter);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
/* XXX SRIOV migth need another 2sec wait */
if (netif_running(adapter->netdev))
ixgbe_up(adapter); /* also enables intr */
rtnl_unlock();
clear_bit(__IXGBE_RESETTING, &adapter->state);
return (0);
}
/*
* Reconcile kernel and user view of the transmit ring.
*
* Userspace wants to send packets up to the one before ring->head,
* kernel knows kring->nr_hwcur is the first unsent packet.
*
* Here we push packets out (as many as possible), and possibly
* reclaim buffers from previously completed transmission.
*
* ring->tail is updated on return.
* ring->head is never used here.
*
* The caller (netmap) guarantees that there is only one instance
* running at any time. Any interference with other driver
* methods should be handled by the individual drivers.
*/
static int
ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/*
* interrupts on every tx packet are expensive so request
* them every half ring, or where NS_REPORT is set
*/
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct ixgbe_ring *txr = adapter->tx_ring[ring_nr];
int reclaim_tx;
/*
* First part: process new packets to send.
* nm_i is the current index in the netmap ring,
* nic_i is the corresponding index in the NIC ring.
* The two numbers differ because upon a *_init() we reset
* the NIC ring but leave the netmap ring unchanged.
* For the transmit ring, we have
*
* nm_i = kring->nr_hwcur
* nic_i = IXGBE_TDT (not tracked in the driver)
* and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* In this driver kring->nkr_hwofs >= 0, but for other
* drivers it might be negative as well.
*/
/*
* If we have packets to send (kring->nr_hwcur != ring->cur)
* iterate over the netmap ring, fetch length and update
* the corresponding slot in the NIC ring. Some drivers also
* need to update the buffer's physical address in the NIC slot
* even NS_BUF_CHANGED is not set (PNMB computes the addresses).
*
* The netmap_reload_map() calls is especially expensive,
* even when (as in this case) the tag is 0, so do only
* when the buffer has actually changed.
*
* If possible do not set the report/intr bit on all slots,
* but only a few times per ring or when NS_REPORT is set.
*
* Finally, on 10G and faster drivers, it might be useful
* to prefetch the next slot and txr entry.
*/
if (!netif_carrier_ok(ifp)) {
goto out;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
union ixgbe_adv_tx_desc *curr = IXGBE_TX_DESC_ADV(txr, nic_i);
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
IXGBE_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->read.buffer_addr = htole64(paddr);
curr->read.olinfo_status = htole32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
curr->read.cmd_type_len = htole32(len | flags |
IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT |
IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
wmb(); /* synchronize writes to the NIC ring */
/* (re)start the tx unit up to slot nic_i (excluded) */
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->reg_idx), nic_i);
}
/*
* Second part: reclaim buffers for completed transmissions.
* Because this is expensive (we read a NIC register etc.)
* we only do it in specific cases (see below).
*/
if (flags & NAF_FORCE_RECLAIM) {
reclaim_tx = 1; /* forced reclaim */
} else if (!nm_kr_txempty(kring)) {
reclaim_tx = 0; /* have buffers, no reclaim */
} else {
/*
* No buffers available. Locate previous slot with
* REPORT_STATUS set.
* If the slot has DD set, we can reclaim space,
* otherwise wait for the next interrupt.
* This enables interrupt moderation on the tx
* side though it might reduce throughput.
*/
union ixgbe_adv_tx_desc *txd = IXGBE_TX_DESC_ADV(txr, 0);
nic_i = txr->next_to_clean + report_frequency;
if (nic_i > lim)
nic_i -= lim + 1;
// round to the closest with dd set
nic_i = (nic_i < kring->nkr_num_slots / 4 ||
nic_i >= kring->nkr_num_slots*3/4) ?
0 : report_frequency;
reclaim_tx = txd[nic_i].wb.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
}
if (reclaim_tx) {
/*
* Record completed transmissions.
* We (re)use the driver's txr->next_to_clean to keep
* track of the most recently completed transmission.
*
* The datasheet discourages the use of TDH to find
* out the number of sent packets, but we only set
* REPORT STATUS in a few slots so TDH is the only
* good way.
*/
nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
txr->next_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
out:
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
* Same as for the txsync, this routine must be efficient.
* The caller guarantees a single invocations, but races against
* the rest of the driver should be handled here.
*
* When called, userspace has released buffers up to ring->head
* (last one excluded).
*
* If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
* of whether or not we received an interrupt.
*/
static int
ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct SOFTC_T *adapter = netdev_priv(ifp);
struct ixgbe_ring *rxr = adapter->rx_ring[ring_nr];
if (!netif_carrier_ok(ifp))
return 0;
if (head > lim)
return netmap_ring_reinit(kring);
rmb();
/*
* First part: import newly received packets.
*
* nm_i is the index of the next free slot in the netmap ring,
* nic_i is the index of the next received packet in the NIC ring,
* and they may differ in case if_init() has been called while
* in netmap mode. For the receive ring we have
*
* nm_i = (kring->nr_hwtail)
* nic_i = rxr->next_to_clean; // really next to check
* and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* rxr->next_to_clean is set to 0 on a ring reinit
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_clean;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
union ixgbe_adv_rx_desc *curr = IXGBE_RX_DESC_ADV(rxr, nic_i);
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
ring->slot[nm_i].flags = slot_flags;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_clean = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
* (kring->nr_hwcur to ring->head excluded),
* and make the buffers available for reception.
* As usual nm_i is the index in the netmap ring,
* nic_i is the index in the NIC ring, and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
union ixgbe_adv_rx_desc *curr = IXGBE_RX_DESC_ADV(rxr, nic_i);
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->wb.upper.status_error = 0;
curr->read.pkt_addr = htole64(paddr);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
rxr->next_to_use = nic_i; // XXX not really used
wmb();
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->reg_idx), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/*
* if in netmap mode, attach the netmap buffers to the ring and return true.
* Otherwise return false.
*/
static int
ixgbe_netmap_configure_tx_ring(struct SOFTC_T *adapter, int ring_nr)
{
struct netmap_adapter *na = NA(adapter->netdev);
struct netmap_slot *slot;
//int j;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_TX, ring_nr, 0);
if (!slot)
return 0; // not in netmap; XXX cannot happen
#if 0
/*
* on a generic card we should set the address in the slot.
* But on the ixgbe, the address needs to be rewritten
* after a transmission so there is nothing do to except
* loading the map.
*/
for (j = 0; j < na->num_tx_desc; j++) {
int sj = netmap_idx_n2k(&na->tx_rings[ring_nr], j);
uint64_t paddr;
void *addr = PNMB(slot + sj, &paddr);
}
#endif
return 1;
}
static int
ixgbe_netmap_configure_rx_ring(struct SOFTC_T *adapter, int ring_nr)
{
/*
* In netmap mode, we must preserve the buffers made
* available to userspace before the if_init()
* (this is true by default on the TX side, because
* init makes all buffers available to userspace).
*
* netmap_reset() and the device-specific routines
* (e.g. ixgbe_setup_receive_rings()) map these
* buffers at the end of the NIC ring, so here we
* must set the RDT (tail) register to make sure
* they are not overwritten.
*
* In this driver the NIC ring starts at RDH = 0,
* RDT points to the last slot available for reception (?),
* so RDT = num_rx_desc - 1 means the whole ring is available.
*/
struct netmap_adapter *na = NA(adapter->netdev);
struct netmap_slot *slot;
int lim, i;
struct ixgbe_ring *ring = adapter->rx_ring[ring_nr];
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_RX, ring_nr, 0);
/* same as in ixgbe_setup_transmit_ring() */
if (!slot)
return 0; // not in netmap; XXX cannot happen
lim = na->num_rx_desc - 1 - nm_kr_rxspace(&na->rx_rings[ring_nr]);
for (i = 0; i < na->num_rx_desc; i++) {
/*
* Fill the map and set the buffer address in the NIC ring,
* considering the offset between the netmap and NIC rings
* (see comment in ixgbe_setup_transmit_ring() ).
*/
int si = netmap_idx_n2k(&na->rx_rings[ring_nr], i);
uint64_t paddr;
PNMB(slot + si, &paddr);
// netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
/* Update descriptor */
IXGBE_RX_DESC_ADV(ring, i)->read.pkt_addr = htole64(paddr);
}
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(ring_nr), lim);
return 1;
}
/*
* The attach routine, called near the end of ixgbe_attach(),
* fills the parameters for netmap_attach() and calls it.
* It cannot fail, in the worst case (such as no memory)
* netmap mode will be disabled and the driver will only
* operate in standard mode.
*/
static void
ixgbe_netmap_attach(struct SOFTC_T *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->netdev;
na.num_tx_desc = adapter->tx_ring[0]->count;
na.num_rx_desc = adapter->rx_ring[0]->count;
na.nm_txsync = ixgbe_netmap_txsync;
na.nm_rxsync = ixgbe_netmap_rxsync;
na.nm_register = ixgbe_netmap_reg;
na.num_tx_rings = adapter->num_tx_queues;
na.num_rx_rings = adapter->num_rx_queues;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,736 +0,0 @@
/*
* Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id: mlx4_netmap_linux.h $
*
* netmap support for mlx4 (LINUX version)
*
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
#define SOFTC_T mlx4_en_priv
/*
* This driver is split in multiple small files.
* The main device descriptor has type struct mlx4_en_priv *priv;
* and we attach to the device in mlx4_en_init_netdev()
* (do port numbers start from 1 ?)
*
* The reconfig routine is in mlx4_en_start_port() (also here)
* which is called on a mlx4_en_restart() (watchdog), open and set-mtu.
*
* priv->num_frags ??
* DS_SIZE ??
* apparently each rx desc is followed by frag.descriptors
* and the rx desc is rounded up to a power of 2.
*
* Receive code is in en_rx.c
* priv->rx_ring_num number of rx rings
* rxr = prov->rx_ring[ring_ind] rx ring descriptor
* rxr->size number of slots
* rxr->prod producer
* probably written into a mmio reg at *rxr->wqres.db.db
* trimmed to 16 bits.
*
* Rx init routine:
* mlx4_en_activate_rx_rings()
* mlx4_en_init_rx_desc()
* Transmit code is in en_tx.c
*/
int mlx4_netmap_rx_config(struct SOFTC_T *priv, int ring_nr);
int mlx4_netmap_tx_config(struct SOFTC_T *priv, int ring_nr);
int mlx4_tx_desc_dump(struct mlx4_en_tx_desc *tx_desc);
#ifdef NETMAP_MLX4_MAIN
static inline void
nm_pkt_dump(int i, char *buf, int len)
{
uint8_t *s __attribute__((unused)) = buf+6, *d __attribute__((unused)) = buf;
RD(10, "%d len %4d %02x:%02x:%02x:%02x:%02x:%02x -> %02x:%02x:%02x:%02x:%02x:%02x",
i,
len,
s[0], s[1], s[2], s[3], s[4], s[5],
d[0], d[1], d[2], d[3], d[4], d[5]);
}
/* show the content of the descriptor. Only the first block is printed
* to make sure we do not fail on wraparounds (otherwise we would need
* base, index and ring size).
*/
int
mlx4_tx_desc_dump(struct mlx4_en_tx_desc *tx_desc)
{
struct mlx4_wqe_ctrl_seg *ctrl = &tx_desc->ctrl;
uint32_t *p = (uint32_t *)tx_desc;
int i, l = ctrl->fence_size;
RD(5,"------- txdesc %p size 0x%x", tx_desc, ctrl->fence_size);
if (l > 4)
l = 4;
for (i = 0; i < l; i++) {
RD(20, "[%2d]: 0x%08x 0x%08x 0x%08x 0x%08x", i,
ntohl(p[0]), ntohl(p[1]), ntohl(p[2]), ntohl(p[3]));
p += 4;
}
return 0;
}
/*
* Register/unregister. We are already under (netmap) core lock.
* Only called on the first register or the last unregister.
*/
static int
mlx4_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *priv = netdev_priv(ifp);
int error = 0, need_load = 0;
struct mlx4_en_dev *mdev = priv->mdev;
/*
* On enable, flush pending ops, set flag and reinit rings.
* On disable, flush again, and restart the interface.
*/
D("setting netmap mode for %s to %s", ifp->if_xname, onoff ? "ON" : "OFF");
// rtnl_lock(); // ???
if (netif_running(ifp)) {
D("unloading %s", ifp->if_xname);
//double_mutex_state_lock(mdev);
mutex_lock(&mdev->state_lock);
if (onoff == 0) {
int i;
/* coming from netmap mode, clean up the ring pointers
* so we do not crash in mlx4_en_free_tx_buf()
* XXX should STAMP the txdesc value to pretend the hw got there
* 0x7fffffff plus the bit set to
* !!(ring->cons & ring->size)
*/
for (i = 0; i < na->num_tx_rings; i++) {
struct mlx4_en_tx_ring *txr = &priv->tx_ring[i];
ND("txr %d : cons %d prod %d txbb %d", i, txr->cons, txr->prod, txr->last_nr_txbb);
txr->cons += txr->last_nr_txbb; // XXX should be 1
for (;txr->cons != txr->prod; txr->cons++) {
uint16_t j = txr->cons & txr->size_mask;
uint32_t new_val, *ptr = (uint32_t *)(txr->buf + j * TXBB_SIZE);
new_val = cpu_to_be32(STAMP_VAL | (!!(txr->cons & txr->size) << STAMP_SHIFT));
ND(10, "old 0x%08x new 0x%08x", *ptr, new_val);
*ptr = new_val;
}
}
}
mlx4_en_stop_port(ifp);
need_load = 1;
}
retry:
if (onoff) { /* enable netmap mode */
nm_set_native_flags(na);
} else { /* reset normal mode */
nm_clear_native_flags(na);
}
if (need_load) {
D("loading %s", ifp->if_xname);
error = mlx4_en_start_port(ifp);
D("start_port returns %d", error);
if (error && onoff) {
onoff = 0;
goto retry;
}
mutex_unlock(&mdev->state_lock);
//double_mutex_state_unlock(mdev);
}
// rtnl_unlock();
return (error);
}
/*
* Reconcile kernel and user view of the transmit ring.
* This routine might be called frequently so it must be efficient.
*
OUTGOING (txr->prod)
Tx packets need to fill a 64-byte block with one control block and
one descriptor (both 16-byte). Probably we need to fill the other
two data entries in the block with NULL entries as done in rx_config().
One can request completion reports (intr) on all entries or only
on selected ones. The std. driver reports every 16 packets.
txr->prod points to the first available slot to send.
COMPLETION (txr->cons)
TX events are reported through a Completion Queue (CQ) whose entries
can be 32 or 64 bytes. In case of 64 bytes, the interesting part is
at odd indexes. The "factor" variable does the addressing.
txr->cons points to the last completed block (XXX note so it is 1 behind)
There is no link back from the txring to the completion
queue so we need to track it ourselves. HOWEVER mlx4_en_alloc_resources()
uses the same index for cq and ring so tx_cq and tx_ring correspond,
same for rx_cq and rx_ring.
*/
static int
mlx4_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/*
* interrupts on every tx packet are expensive so request
* them every half ring, or where NS_REPORT is set
*/
u_int report_frequency = kring->nkr_num_slots >> 1;
struct SOFTC_T *priv = netdev_priv(ifp);
int error = 0;
if (!netif_carrier_ok(ifp)) {
goto out;
}
// XXX debugging, only print if sending something
n = (txr->prod - txr->cons - 1) & 0xffffff; // should be modulo 2^24 ?
if (n >= txr->size) {
RD(5, "XXXXXXXXXXX txr %d overflow: cons %u prod %u size %d delta %d",
ring_nr, txr->cons, txr->prod, txr->size, n);
}
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
// XXX debugging, assuming lim is 2^x-1
n = 0; // XXX debugging
if (nm_i != head) { /* we have new packets to send */
ND(5,"START: txr %u cons %u prod %u hwcur %u head %u tail %d send %d",
ring_nr, txr->cons, txr->prod, kring->nr_hwcur, ring->head, kring->nr_hwtail,
(head - nm_i) & lim);
// XXX see en_tx.c :: mlx4_en_xmit()
/*
* In netmap the descriptor has one control segment
* and one data segment. The control segment is 16 bytes,
* the data segment is another 16 bytes mlx4_wqe_data_seg.
* The alignment is TXBB_SIZE (64 bytes) though, so we are
* forced to use 64 bytes each.
*/
ND(10,"=======>========== send from %d to %d at bd %d", j, k, txr->prod);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
uint32_t l = txr->prod & txr->size_mask;
struct mlx4_en_tx_desc *tx_desc = txr->buf + l * TXBB_SIZE;
struct mlx4_wqe_ctrl_seg *ctrl = &tx_desc->ctrl;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, unload and reload map */
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/*
* Fill the slot in the NIC ring.
*/
ctrl->vlan_tag = 0; // not used
ctrl->ins_vlan = 0; // NO
ctrl->fence_size = 2; // used descriptor size in 16byte blocks
// request notification. XXX later report only if NS_REPORT or not too often.
ctrl->srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
MLX4_WQE_CTRL_SOLICITED);
// XXX do we need to copy the mac dst address ?
if (1) { // XXX do we need this ?
uint64_t mac = mlx4_en_mac_to_u64(addr);
uint32_t mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16);
uint32_t mac_l = (u32) (mac & 0xffffffff);
ctrl->srcrb_flags |= cpu_to_be32(mac_h);
ctrl->imm = cpu_to_be32(mac_l);
}
tx_desc->data.addr = cpu_to_be64(paddr);
tx_desc->data.lkey = cpu_to_be32(priv->mdev->mr.key);
wmb(); // XXX why here ?
tx_desc->data.byte_count = cpu_to_be32(len); // XXX crc corrupt ?
wmb();
ctrl->owner_opcode = cpu_to_be32(
MLX4_OPCODE_SEND |
((txr->prod & txr->size) ? MLX4_EN_BIT_DESC_OWN : 0) );
txr->prod++;
nm_i = nm_next(nm_i, lim);
}
kring->nr_hwcur = head;
/* XXX Check how to deal with nkr_hwofs */
/* these two are always in sync. */
wmb(); /* synchronize writes to the NIC ring */
/* (re)start the transmitter up to slot l (excluded) */
ND(5, "doorbell cid %d data 0x%x", txdata->cid, txdata->tx_db.raw);
// XXX is this doorbell correct ?
iowrite32be(txr->doorbell_qpn, txr->bf.uar->map + MLX4_SEND_DOORBELL);
}
// XXX debugging, only print if sent something
if (n)
ND(5, "SENT: txr %d cons %u prod %u hwcur %u cur %u tail %d sent %d",
ring_nr, txr->cons, txr->prod, kring->nr_hwcur, ring->cur, kring->nr_hwtail, n);
/*
* Second part: reclaim buffers for completed transmissions.
*/
{
struct mlx4_en_cq *cq = &priv->tx_cq[ring_nr];
struct mlx4_cq *mcq = &cq->mcq;
int size = cq->size; // number of entries
struct mlx4_cqe *buf = cq->buf; // base of cq entries
uint32_t size_mask = txr->size_mask; // same in txq and cq ?.......
uint16_t new_index, ring_index;
int factor = priv->cqe_factor; // 1 for 64 bytes, 0 for 32 bytes
/*
* Reclaim buffers for completed transmissions. The CQE tells us
* where the consumer (NIC) is. Bit 7 of the owner_sr_opcode
* is the ownership bit. It toggles up and down so the
* non-bitwise XNOR trick lets us detect toggles as the ring
* wraps around. On even rounds, the second operand is 0 so
* we exit when the MLX4_CQE_OWNER_MASK bit is 1, viceversa
* on odd rounds.
*/
new_index = ring_index = txr->cons & size_mask;
for (n = 0; n < 2*lim; n++) {
uint16_t index = mcq->cons_index & size_mask;
struct mlx4_cqe *cqe = &buf[(index << factor) + factor];
if (!XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
mcq->cons_index & size))
break;
/*
* make sure we read the CQE after we read the
* ownership bit
*/
rmb();
/* Skip over last polled CQE */
new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
ND(5, "txq %d new_index %d", ring_nr, new_index);
mcq->cons_index++;
}
if (n > lim) {
D("XXXXXXXXXXX too many notifications %d", n);
}
/* now we have updated cons-index, notify the card. */
/* XXX can we make it conditional ? */
wmb();
mlx4_cq_set_ci(mcq);
// XXX the following enables interrupts... */
// mlx4_en_arm_cq(priv, cq); // XXX always ?
wmb();
/* XXX unsigned arithmetic below */
n = (new_index - ring_index) & size_mask;
if (n) {
ND(5, "txr %d completed %d packets", ring_nr, n);
txr->cons += n;
/* XXX watch out, index is probably modulo */
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, (new_index & size_mask)), lim);
}
if (nm_kr_txempty(kring)) {
mlx4_en_arm_cq(priv, cq);
}
}
out:
nm_txsync_finalize(kring);
return 0;
err:
if (error)
return netmap_ring_reinit(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
MELLANOX:
the ring has prod and cons indexes, the size is a power of 2,
size and actual_size indicate how many entries can be allocated,
stride is the size of each entry.
mlx4_en_update_rx_prod_db() tells the NIC where it can go
(to be used when new buffers are freed).
*/
static int
mlx4_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
struct SOFTC_T *priv = netdev_priv(ifp);
struct mlx4_en_rx_ring *rxr = &priv->rx_ring[ring_nr];
if (!priv->port_up) // XXX as in mlx4_en_process_rx_cq()
return 0;
if (!netif_carrier_ok(ifp)) // XXX maybe above is redundant ?
return 0;
if (head > lim)
return netmap_ring_reinit(kring);
ND(5, "START rxr %d cons %d prod %d kcur %d ktail %d cur %d tail %d",
ring_nr, rxr->cons, rxr->prod, kring->nr_hwcur, kring->nr_hwtail, ring->cur, ring->tail);
/*
* First part, import newly received packets.
*/
/* scan the completion queue to see what is going on.
* The mapping is 1:1. The hardware toggles the OWNER bit in the
* descriptor at mcq->cons_index & size_mask, which is mapped 1:1
* to an entry in the RXR.
* XXX there are two notifications sent to the hw:
* mlx4_cq_set_ci(struct mlx4_cq *cq);
* *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff);
* mlx4_en_update_rx_prod_db(rxr);
* *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
* apparently they point to the same memory word
* (see mlx4_en_activate_cq() ) and are initialized to 0
* DB is the doorbell page (sec.15.1.2 ?)
* wqres is set in mlx4_alloc_hwq_res()
* and in turn mlx4_alloc_hwq_res()
*/
if (1 || netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
struct mlx4_en_cq *cq = &priv->rx_cq[ring_nr];
struct mlx4_cq *mcq = &cq->mcq;
int factor = priv->cqe_factor;
uint32_t size_mask = rxr->size_mask;
int size = cq->size;
struct mlx4_cqe *buf = cq->buf;
nm_i = kring->nr_hwtail;
/* Process all completed CQEs, use same logic as in TX */
for (n = 0; n <= 2*lim ; n++) {
int index = mcq->cons_index & size_mask;
struct mlx4_cqe *cqe = &buf[(index << factor) + factor];
prefetch(cqe+1);
if (!XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, mcq->cons_index & size))
break;
rmb(); /* make sure data is up to date */
ring->slot[nm_i].len = be32_to_cpu(cqe->byte_cnt) - rxr->fcs_del;
ring->slot[nm_i].flags = slot_flags;
mcq->cons_index++;
nm_i = nm_next(nm_i, lim);
}
if (n) { /* update the state variables */
if (n >= 2*lim)
D("XXXXXXXXXXXXX too many received packets %d", n);
ND(5, "received %d packets", n);
kring->nr_hwtail = nm_i;
rxr->cons += n;
ND(5, "RECVD %d rxr %d cons %d prod %d kcur %d ktail %d cur %d tail %d",
n,
ring_nr, rxr->cons, rxr->prod, kring->nr_hwcur, kring->nr_hwtail, ring->cur, ring->tail);
/* XXX ack completion queue */
mlx4_cq_set_ci(mcq);
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur; /* netmap ring index */
if (nm_i != head) { /* userspace has released some packets. */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
/* collect per-slot info, with similar validations
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
struct mlx4_en_rx_desc *rx_desc = rxr->buf + (nic_i * rxr->stride);
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
// netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
/* XXX
* The rx descriptor only contains buffer descriptors,
* probably only the length is changed or not even that one.
*/
// see mlx4_en_prepare_rx_desc() and mlx4_en_alloc_frag()
rx_desc->data[0].addr = cpu_to_be64(paddr);
rx_desc->data[0].byte_count = cpu_to_be32(NETMAP_BUF_SIZE);
rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key);
#if 0
int jj, possible_frags;
/* we only use one fragment, so the rest is padding */
possible_frags = (rxr->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
for (jj = 1; jj < possible_frags; jj++) {
rx_desc->data[jj].byte_count = 0;
rx_desc->data[jj].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
rx_desc->data[jj].addr = 0;
}
#endif
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
/* XXX note that mcq->cons_index and ring->cons are not in sync */
wmb();
rxr->prod += n;
kring->nr_hwcur = head;
/* and now tell the system that there are more buffers available.
* should use mlx4_en_update_rx_prod_db(rxr) but it is static in
* en_rx.c so we do not see it here
*/
*rxr->wqres.db.db = cpu_to_be32(rxr->prod & 0xffff);
ND(5, "FREED rxr %d cons %d prod %d kcur %d ktail %d",
ring_nr, rxr->cons, rxr->prod,
kring->nr_hwcur, kring->nr_hwtail);
}
/* tell userspace that there are new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/*
* If in netmap mode, attach the netmap buffers to the ring and return true.
* Otherwise return false.
* Called at the end of mlx4_en_start_port().
* XXX TODO: still incomplete.
*/
int
mlx4_netmap_tx_config(struct SOFTC_T *priv, int ring_nr)
{
struct netmap_adapter *na = NA(priv->dev);
struct netmap_slot *slot;
struct mlx4_en_cq *cq;
ND(5, "priv %p ring_nr %d", priv, ring_nr);
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
/*
CONFIGURE TX RINGS IN NETMAP MODE
little if anything to do
The main code does
mlx4_en_activate_cq()
mlx4_en_activate_tx_ring()
<Set initial ownership of all Tx TXBBs to SW (1)>
*/
slot = netmap_reset(na, NR_TX, ring_nr, 0);
if (!slot)
return 0; // not in netmap mode;
ND(5, "init tx ring %d with %d slots (driver %d)", ring_nr,
na->num_tx_desc,
priv->tx_ring[ring_nr].size);
/* enable interrupts on the netmap queues */
cq = &priv->tx_cq[ring_nr]; // derive from the txring
return 1;
}
int
mlx4_netmap_rx_config(struct SOFTC_T *priv, int ring_nr)
{
struct netmap_adapter *na = NA(priv->dev);
struct netmap_slot *slot;
struct mlx4_en_rx_ring *rxr;
struct netmap_kring *kring;
int i, j, possible_frags;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
/*
* on the receive ring, must set buf addresses into the slots.
The ring is activated by mlx4_en_activate_rx_rings(), near the end
the rx ring is also 'started' with mlx4_en_update_rx_prod_db()
so we patch into that routine.
*/
slot = netmap_reset(na, NR_RX, ring_nr, 0);
if (!slot) // XXX should not happen
return 0;
kring = &na->rx_rings[ring_nr];
rxr = &priv->rx_ring[ring_nr];
ND(20, "ring %d slots %d (driver says %d) frags %d stride %d", ring_nr,
kring->nkr_num_slots, rxr->actual_size, priv->num_frags, rxr->stride);
rxr->prod--; // XXX avoid wraparounds ?
if (kring->nkr_num_slots != rxr->actual_size) {
D("mismatch between slots and actual size, %d vs %d",
kring->nkr_num_slots, rxr->actual_size);
return 1; // XXX error
}
possible_frags = (rxr->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
RD(1, "stride %d possible frags %d descsize %d DS_SIZE %d", rxr->stride, possible_frags, (int)sizeof(struct mlx4_en_rx_desc), (int)DS_SIZE );
/* then fill the slots with our entries */
for (i = 0; i < kring->nkr_num_slots; i++) {
uint64_t paddr;
struct mlx4_en_rx_desc *rx_desc = rxr->buf + (i * rxr->stride);
PNMB(slot + i, &paddr);
// see mlx4_en_prepare_rx_desc() and mlx4_en_alloc_frag()
rx_desc->data[0].addr = cpu_to_be64(paddr);
rx_desc->data[0].byte_count = cpu_to_be32(NETMAP_BUF_SIZE);
rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key);
/* we only use one fragment, so the rest is padding */
for (j = 1; j < possible_frags; j++) {
rx_desc->data[j].byte_count = 0;
rx_desc->data[j].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
rx_desc->data[j].addr = 0;
}
}
RD(5, "ring %d done", ring_nr);
return 1;
}
static int
mlx4_netmap_config(struct netmap_adapter *na,
u_int *txr, u_int *txd, u_int *rxr, u_int *rxd)
{
struct net_device *ifp = na->ifp;
struct SOFTC_T *priv = netdev_priv(ifp);
*txr = priv->tx_ring_num;
*txd = priv->tx_ring[0].size;
*rxr = priv->rx_ring_num;
if (*txr > *rxr) {
D("using only %d out of %d tx queues", *rxr, *txr);
*txr = *rxr;
}
*rxd = priv->rx_ring[0].size;
D("txr %d txd %d bufsize %d -- rxr %d rxd %d act %d bufsize %d",
*txr, *txd, priv->tx_ring[0].buf_size,
*rxr, *rxd, priv->rx_ring[0].actual_size,
priv->rx_ring[0].buf_size);
return 0;
}
/*
* The attach routine, called near the end of mlx4_en_init_netdev(),
* fills the parameters for netmap_attach() and calls it.
* It cannot fail, in the worst case (such as no memory)
* netmap mode will be disabled and the driver will only
* operate in standard mode.
*
* XXX TODO:
* at the moment use a single lock, and only init a max of 4 queues.
*/
static void
mlx4_netmap_attach(struct SOFTC_T *priv)
{
struct netmap_adapter na;
struct net_device *dev = priv->dev;
int rxq, txq;
bzero(&na, sizeof(na));
na.ifp = dev;
rxq = priv->rx_ring_num;
txq = priv->tx_ring_num;
/* this card has 1k tx queues, so better limit the number */
if (rxq > 16)
rxq = 16;
if (txq > rxq)
txq = rxq;
if (txq < 1 && rxq < 1)
txq = rxq = 1;
na.num_tx_rings = txq;
na.num_rx_rings = rxq;
na.num_tx_desc = priv->tx_ring[0].size;
na.num_rx_desc = priv->rx_ring[0].size;
na.nm_txsync = mlx4_netmap_txsync;
na.nm_rxsync = mlx4_netmap_rxsync;
na.nm_register = mlx4_netmap_reg;
na.nm_config = mlx4_netmap_config;
netmap_attach(&na);
}
#endif /* NETMAP_MLX4_MAIN */
/* end of file */

File diff suppressed because it is too large Load Diff

View File

@ -1,68 +0,0 @@
#!/bin/sh
#set -x # for debugging
if [ -z "$NMSRC" ]; then
NMSRC=~/netmap-release
fi
DRIVER="ixgbe"
#IF="eth0" # force an interface
if [ ! -f ${NMSRC}/LINUX/netmap_lin.ko ]; then
echo "LINUX/netmap_lin.ko missing. Please compile netmap."
exit 1
fi
if [ ! -f ${NMSRC}/LINUX/${DRIVER}/${DRIVER}.ko ]; then
echo "LINUX/${DRIVER}/${DRIVER}.ko missing."
echo "Please compile netmap or make sure to have netmap support for ${DRIVER}"
exit 1
fi
NMLOADED=$(lsmod | grep netmap_lin | wc -l)
DRVLOADED=$(lsmod | grep "${DRIVER}" | wc -l)
# Unload the driver
if [ $DRVLOADED != "0" ]; then
sudo rmmod "$DRIVER"
fi
# Load netmap
if [ $NMLOADED == "0" ]; then
sudo insmod ${NMSRC}/LINUX/netmap_lin.ko
fi
if [ "$1" == "g" ]; then
# In order to use generic netmap adapter, load the original driver module, that doesn't
# have netmap support
sudo modprobe ${DRIVER}
echo "Generic netmap adapter."
else
# Use the driver modified with netmap support
sudo insmod ${NMSRC}/LINUX/${DRIVER}/${DRIVER}.ko
echo "Native netmap adapter."
fi
# Wait a bit for interface name changing
sleep 2
# Find all interfaces
IFLIST=$(ip link | grep -o "^[0-9]\+: [^:]\+" | awk '{print $2}')
IFLIST=$(echo ${IFLIST})
# Find the interface that match the driver $DRIVER
for i in $IFLIST; do
drv=$(sudo ethtool -i $i 2> /dev/null | grep "driver" | awk '{print $2}')
if [ "$drv" == "$DRIVER" ]; then
IF=$i
echo " Found interface \"${IF}\""
fi
done
if [ "$IF" == "" ]; then
echo "No interface using ${DRIVER} driver was found."
exit 1
fi
sudo ip link set ${IF} up

View File

@ -1 +0,0 @@
final-patches

View File

@ -1,3 +0,0 @@
#!/bin/sh
sed -n 's/^## \?//p' $1 | fmt

View File

@ -1,82 +0,0 @@
#!/bin/sh
#set -x
function pgset()
{
local result
echo $1 > ${PGDEV}
result=$(cat $PGDEV | fgrep "Result: OK:")
if [ "$result" = "" ]; then
cat $PGDEV | fgrep "Result:"
fi
}
##################### Script configuration ######################
N="$1" # number of TX kthreads minus one
if [ -z "$1" ]; then
N=0
fi
NCPUS="7" # number of CPUs on your machine minus one
IF="enp1s0f1" # network interface to test
DST_IP="10.216.8.1" # destination IP address
DST_MAC="00:1b:21:80:e7:d9" # destination MAC address
PKT_SIZE="60" # packet size
PKT_COUNT="10000000" # number of packets to send
CLONE_SKB="10000" # how many times a sk_buff is recycled
# Load pktgen kernel module
modprobe pktgen
# Clean the configuration for all the CPU-kthread (from 0 to ${NCPUS})
IDX=$(seq 0 1 ${NCPUS})
for cpu in ${IDX}; do
PGDEV="/proc/net/pktgen/kpktgend_${cpu}"
echo "Removing all devices (${cpu})"
pgset "rem_device_all"
done
IDX=$(seq 0 1 ${N})
for cpu in ${IDX}; do
# kthread-device configuration
PGDEV="/proc/net/pktgen/kpktgend_${cpu}"
echo "Configuring $PGDEV"
echo "Adding ${IF}@${cpu}"
pgset "add_device ${IF}@${cpu}"
# Packets/mode configuration
PGDEV="/proc/net/pktgen/${IF}@${cpu}"
echo "Configuring $PGDEV"
pgset "count ${PKT_COUNT}"
pgset "clone_skb ${CLONE_SKB}"
pgset "pkt_size ${PKT_SIZE}"
pgset "delay 0"
pgset "dst $DST_IP"
pgset "dst_mac $DST_MAC"
pgset "flag QUEUE_MAP_CPU"
echo ""
done
# Run
PGDEV="/proc/net/pktgen/pgctrl"
echo "Running... Ctrl-C to stop"
pgset "start"
echo "Done."
# Show results
NUMS=""
for cpu in ${IDX}; do
TMP=$(cat /proc/net/pktgen/${IF}@${cpu} | grep -o "[0-9]\+pps" | grep -o "[0-9]\+")
echo "$cpu $TMP"
NUMS="${NUMS} ${TMP}"
done
echo "Total TX rate: $(echo $NUMS | tr ' ' '+' | bc)"

View File

@ -1,428 +0,0 @@
#!/bin/bash
## Manage linux driver patches for netmap.
## usage (from the dir containing the Makefile):
##
## scripts/np <action> [args...]
##
## where <action> is any of the functions below.
##
[ -f scripts/conf ] && source scripts/conf
## The following enviroment variables must be set:
##
## GITDIR: the absolute path of the netmap linux
## git repository, containing all the required netmap-*
## branches.
[ -n "$GITDIR" -a -d "$GITDIR/.git" ] || {
echo "GITDIR not set or not valid" >&2
exit 1
}
NETMAP_BRANCH=${NETMAP_BRANCH:-master}
function error {
echo "$@" >&2
exit 1
}
function get-params {
local params=$1; shift
err_msg="$PROGNAME $COMMAND $(echo $params| perl -pe 's/\S+/<$&>/g')"
local param
for param in $params; do
[[ -z "$@" ]] && error "$err_msg"
pname=$(echo -n $param | perl -pe 's/\W/_/g')
eval $pname="$1"
shift
done
[[ -n "$@" ]] && error "$err_msg"
}
##
## LINUX_SOURCES: the absolute path of a
## directory used to store all required linux-* source trees
## (The script will extract linux-x.y.z from GITDIR if it needs
## it and $LINUX_SOURCES does not already contain it).
##
## LINUX_CONFIGS: the absolute path of a
## directory containing the configuration files for
## the linux kernel. The file for version x must be named
## config-x. config-all can be used as a default.
##
## The configuration variables can be put in scripts/conf.
##
##
## Available actions:
##
##
## driver-path <driver> <version>
## retrieves the path of <driver> in the linux sources
## for version <version>. The path is output to stdout.
## It uses a local cache to minimize the expensive
## file system search.
function driver-path()
{
get-params "driver version" "$@"
cat cache/$version/$driver/path 2>/dev/null && return
local kern=$(get-kernel $version)
mkdir -p cache/$version/$driver
(
cd $kern
find drivers/net -name $driver
) | tee cache/$version/$driver/path
}
##
## get-patch [-c] <driver> <version>
## extract the netmap patch for the given <driver> and the
## given kernel <version>. The patch is stored in tmp-patches
## and the name of the patch is output to stdout.
## If a patch with the same name already exists in tmp-patches
## it is overwritten, unless the -c option is used,
## in which case the existing patch is kept (the patch name is still output).
function get-patch()
{
local use_cache
[ "$1" = -c ] && { use_cache=1; shift; }
get-params "driver version" "$@"
# convert kernel version to fixed notation
local v1=$(scripts/vers $version -c)
# compute next kernel version (in fixed notation)
local v2=$(scripts/vers $version -i -c)
local patchname=diff--$driver--$v1--$v2
local out=tmp-patches/$patchname
[ -n "$use_cache" -a -s $out ] && { echo $out; return; }
local drvpath=$(driver-path $driver $version)
[ -n "$drvpath" ] || return
local drvdir=$(dirname $drvpath)
(
cd $GITDIR
git diff --relative=$drvdir v$version..netmap-$version -- $drvpath
) > $out
# an empty patch means no netmap support for this driver
[ -s $out ] || { rm $out; return 1; }
echo $out
return 0;
}
##
## get-range <driver> <version1> <version2>
## extracts the netmap patches for the given <driver> for
## all the kernel versions from <version1> (included) to
## <version2> (excluded). All patches are stored in tmp-patches
## and their names are output to stdout.
function get-range()
{
get-params "driver version1 version2" "$@"
local v=$version1
# while version is less than $version2
while scripts/vers -b $v $version2 -L; do
get-patch $driver $v
# compute next version
v=$(scripts/vers $v -i)
done
}
##
## get-src <driver> <version> <dest>
## copies the original sources of the given <driver>,
## from the given kernel <version> to the given <dest>
## directory.
## It uses a local cache to minimize the expensive
## checkouts in GITDIR.
function get-src()
{
get-params "driver version dest" "$@"
local kern=$(get-kernel $version)
local src=$(driver-path $driver $version)
cp -r $kern/$src $dest
}
##
## extend <patch> <version>
## checks wether the range of applicability of the
## given <patch> can be extented to include <version>.
## It returns 0 on success and 1 on failure.
function extend()
{
get-params "patch version" "$@"
local _patch=$(realpath $patch)
# extract the driver name from the patch name
local driver=$(scripts/vers $_patch -s -p -p)
local tmpdir1=$(mktemp -d)
local tmpdir2=$(mktemp -d)
trap "rm -rf $tmpdir1 $tmpdir2" 0
# we get the driver sources for the given <version> and
# we apply two patches separately:
# i) the given <patch>;
# ii) the proper patch from GITDIR.
# We declare <patch> to be extendable if
# - it is still applicable AND
# - we obtain the same files from i) and ii) (ignoring whitespace)
get-src $driver $version $tmpdir1
get-src $driver $version $tmpdir2
(
cd $tmpdir1
patch --no-backup-if-mismatch -p1 < $_patch >/dev/null 2>&1
) || return 1
local patch2=$(get-patch -c $driver $version)
patch2=$(realpath $patch2)
(
cd $tmpdir2
patch -p1 < $patch2 >/dev/null 2>&1
) # this will certainly apply
diff -qbBr $tmpdir1 $tmpdir2 >/dev/null || return 1
return 0
}
##
## minimize <driver>
## tries to minimize the number of patch files for the given
## <driver>. It uses the patches currently found in tmp-patches
## and stores the resulting patches in final-patches.
## If final-patches already contained patches for <driver>,
## they are deleted first.
function minimize()
{
get-params "driver" "$@"
mkdir -p final-patches
local drv=$(basename $driver)
local patches=$(ls tmp-patches/diff--$drv--* 2>/dev/null)
[ -n "$patches" ] || return 1
# put the patch names in $1, $2, ...
set $patches
rm -f final-patches/diff--$drv--*
# the original patches (in tmp-patches) are ordered by version number.
# We consider one patch in turn (the 'pivot') and try
# to extend its range to cover the range of the next
# patch. If this succedes, the merged patch is the new
# pivot, otherwise the current pivot is output and the
# next patch becomes the new pivot. The process
# is repeated until there are no more patches to consider.
local pivot=$1
[ -n "$pivot" -a -e "$pivot" ] || return 1
# extract the left end and right end of the pivot's range
local ple=$(scripts/vers $pivot -s -p -C)
local pre=$(scripts/vers $pivot -s -C)
while [ -n "$pivot" ]; do
shift
if [ -n "$1" ]; then
# extract the left end and right end of the next patch
local nle=$(scripts/vers $1 -s -p -C)
local nre=$(scripts/vers $1 -s -C)
# we admit no gaps in the range
if [ $pre = $nle ] && extend $pivot $nle; then
pre=$nre
continue
fi
fi
# either out of patches or failed merge.
# Compute the file name of the current pivot and store
# the patch in its final location
out=$(scripts/vers diff $drv $ple -c $pre -c -S4)
cp $pivot final-patches/$out
# the new pivot becames the next patch (if any)
pivot=$1
pre=$nre
ple=$nle
done
return 0
}
##
## infty <driver> <version>
## if final-patches contains a patch for <driver> with a range
## ending in <version>, extend it to infinity.
## Do nothing otherwise.
function infty()
{
get-params "driver version" "$@"
local drv=$(basename $driver)
# convert kernel version to fixed notation
local v=$(scripts/vers $version -c)
local last=$(ls final-patches/diff--$drv--*--$v 2>/dev/null|tail -n1)
[ -n "$last" ] || return 1
mv -n $last $(scripts/vers $last -s -p 99999 -S4) 2>/dev/null
}
function get-kernel()
{
get-params "version" "$@"
local dst="$(realpath $LINUX_SOURCES)/linux-$version"
[ -d $dst ] && { echo $dst; return; }
mkdir -p $dst
(
cd $GITDIR
git archive v$v | tar xf - -C $dst
)
echo $dst
}
##
## build-prep <version>
## prepare the linux tree for <version> to be ready
## for external modules compilation.
## The tree is put in $LINUX_SOURCES/linux-<version> and the
## configuration is obtained from $LINUX_CONFIGS/config-<version>
## (or $LINUX_CONFIGS/config-all by default).
## Errors are logged to $LINUX_CONFIGS/linux-<version>.log.
## If $LINUX_SOURCES/linux-<version> already exists,
## nothing is done.
## In all cases, the absolute path of linux-<version> is
## output.
function build-prep()
{
get-params "version" "$@"
local dst=$(get-kernel $version)
exec 3>&1 4>&2 >$dst.log 2>&1
cp $LINUX_CONFIGS/config-$v $dst/.config 2>/dev/null ||
cp $LINUX_CONFIGS/config-all $dst/.config
(
cd $dst
yes '' | make oldconfig
make modules_prepare
)
exec 1>&3 2>&4
echo $dst
}
##
## check-patch <patch>
## check that the given <patch> applies and compiles without
## error for all its declared range of applicability.
## Errors are logged to log/<patch>.
function check-patch()
{
get-params "patch" "$@"
# extract the left version
local v1=$(scripts/vers $patch -s -p -C)
# extract the right version
local v2=$(scripts/vers $patch -s -C)
# extract the driver name
local driver=$(scripts/vers $patch -s -p -p)
local p=$(realpath $patch)
mkdir -p log
local log="$(realpath log)/$(basename $patch)"
local nmcommit=$(cd ..; git show-ref -s heads/$NETMAP_BRANCH)
echo -n $patch...
while scripts/vers -b $v1 $v2 -L; do
# cache lookup
local cache=cache/$v1/$driver
local cpatch=$cache/patch
local cnmcommit=$cache/nmcommit
local cstatus=$cache/status
local clog=$cache/log
if [ -f $cpatch ] &&
cmp -s $cpatch $patch &&
[ "$nmcommit" = "$(cat $cnmcommit)" ]; then
cp $clog $log
ok=$(cat $cstatus)
else
# update cache
cp $patch $cpatch
echo $nmcommit > $cnmcommit
local ksrc=$(build-prep $v1)
local tmpdir=$(mktemp -d)
trap "rm -rf $tmpdir" 0
(cd ..; git archive $NETMAP_BRANCH | tar xf - -C $tmpdir )
pushd $tmpdir/LINUX >/dev/null
mkdir single-patch
rm patches
ln -s single-patch patches
cp $p single-patch
ok=false
make KSRC=$ksrc >$log 2>&1 && ok=true
popd >/dev/null
cp $log $clog
fi
[ $ok = true ] || { echo FAILED; echo false > $cstatus; return 1; }
echo true > $cstatus
rm -rf $tmpdir
# compute next version
v1=$(scripts/vers $v1 -i)
done
echo OK
}
##
## build-check <driver>
## do a check-patch for all the patches of <driver> that are
## currently in tmp-patches. Patches that fail the check
## are moved to failed-patches.
function build-check()
{
get-params "driver" "$@"
mkdir -p failed-patches
local drv=$(basename $driver)
local patches=$(ls tmp-patches/diff--$drv--* 2>/dev/null)
local p
for p in $patches; do
check-patch $p || mv $p failed-patches
done
}
##
## forall <cmd> [args...]
## exec <cmd> <driver> [args...] for all known drivers.
function forall()
{
local cmd=$1
shift
# we obtain the value of DRIVER_SRC from the makefile
# (the +% target is defined in our Makefile and prints
# the contents of variable %)
local driver_srcs=$(make +DRIVER_SRCS)
local driver
for driver in $driver_srcs; do
$cmd $(basename $driver) "$@"
done
}
mkdir -p tmp-patches
PROGNAME=$0
[ -n "$1" ] || {
scripts/help $PROGNAME;
exit 1
}
COMMAND=$1; shift
case $COMMAND in
*-all)
forall ${COMMAND%-all} "$@"
;;
-[hH]|--help|-help|help)
scripts/help $PROGNAME
;;
*)
$COMMAND "$@"
;;
esac

View File

@ -1,162 +0,0 @@
#!/usr/bin/perl
## Simple stack-based RPN calculator for linux version numbers.
## Usage:
##
## scripts/vers [operand|operation ...]
##
## Operations all start with '-', everything else is an operand
## and is pushed on the stack as-is.
## When all arguments have been processed, the content of the
## top of the stack is printed on stdout and the script ends.
##
## Available operations:
sub badversion
{
my $v = shift;
die "Bad version $v";
}
sub conv
{
my $v = shift;
return sprintf "%x%02x%02x", (split /\./, $v);
}
sub rconv
{
my $v = shift;
$v =~ /(.*)(..)(..)$/;
if ($1 > 2 && (hex $3) == 0) {
return sprintf "%d.%d", (hex $1), (hex $2);
}
return sprintf "%d.%d.%d", (hex $1), (hex $2), (hex $3);
}
sub next
{
my $v = shift;
my ($may, $min, $sub) = split /\./, $v;
if ($may < 2 || ($may == 2 && $min != 6)) {
&badversion($v);
}
if ($may == 2) {
if ($sub < 39) {
return "2.6." . ($sub + 1);
} elsif ($sub == 39) {
return "3.0";
} else {
&badversion($v);
}
} else {
return "$may." . ($min + 1);
}
}
@ARGV or do { system("scripts/help $0"); exit 1; };
for (@ARGV) {
##
## -b (nullary) suppress normal output. On exit, return 1
## if stack top is "false", 0 otherwise.
/^-b$/ && do {
$silent=1;
next;
};
##
## -c (unary) convert from dot to fixed notation
/^-c$/ && do {
$v = pop @stack;
push @stack, &conv($v);
next;
};
##
## -C (unary) convert from fixed to dot notation
/^-C$/ && do {
$v = pop @stack;
push @stack, &rconv($v);
next;
};
##
## -i (unary) increment version number
## (must be in dot notation)
/^-i$/ && do {
$v = pop @stack;
push @stack, &next($v);
next;
};
##
## -s (unary) assume the stack top is a
## string containing several fields separated
## by '--'. Replace the stack top with these
## fields (last on top)
/^-s$/ && do {
$v = pop @stack;
push @stack, split /--/, $v;
next;
};
##
## -SN (N-ary) pop N elements from the stack,
## join them using '--' as a separator
## (top as last) and push the resulting
## string
/^-S(\d+)$/ && do {
$n = $1;
@t = @stack[-$n..-1];
while ($n--) {
pop @stack;
}
push @stack, (join '--', @t);
next;
};
##
## -p (unary) pop
/^-p$/ && do {
pop @stack;
next;
};
##
## -l (binary) push "true" if first version
## number is stricly less then second version
## number (versions in fixed notation)
##
## -L (binary) like -l, but for version numbers
## in dot notation
/^-[lL]$/ && do {
$v1 = pop @stack;
$v2 = pop @stack;
/^-L$/ && do {
$v1 = &conv($v1);
$v2 = &conv($v2);
};
push @stack, (($v2 lt $v1) ? "true" : "false");
next;
};
##
## -a (binary) logical and. Arguments must be
## either "true" or "false".
/^-a$/ && do {
$v1 = pop @stack;
$v2 = pop @stack;
push @stack, (($v1 eq "true" && $v2 eq "true") ? "true" : "false");
next;
};
##
## -n (unary) logical not. Argument must be
## either "true" or "false".
/^-n$/ && do {
$v1 = pop @stack;
push @stack, (($v1 eq "true") ? "false" : "true");
next;
};
push @stack, $_;
}
$v = pop @stack;
if ($silent) {
exit ($v eq "false");
}
print "$v\n";

View File

@ -1,538 +0,0 @@
/*
* Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <bsd_glue.h>
#include <net/netmap.h>
#include <netmap/netmap_kern.h>
#define SOFTC_T virtnet_info
static int virtnet_close(struct ifnet *ifp);
static int virtnet_open(struct ifnet *ifp);
static void free_receive_bufs(struct virtnet_info *vi);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
/* Before 2.6.35 there was no net_device.num_rx_queues, so we assume 1. */
#define DEV_NUM_RX_QUEUES(_netdev) 1
/* A scatterlist struct is needed by functions that invoke
virtqueue_add_buf() methods, but before 2.6.35 these struct were
not part of virtio-net data structures, but were defined in those
function. This macro does this definition, which is not necessary
for subsequent versions. */
#define COMPAT_DECL_SG struct scatterlist _compat_sg;
#else /* >= 2.6.35 */
#define DEV_NUM_RX_QUEUES(_netdev) (_netdev)->num_rx_queues
#define COMPAT_DECL_SG
#endif /* >= 2.6.35 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
/* Before 2.6.35, the virtio interface was not exported with functions,
but using virtqueue callbacks. */
#define virtqueue_detach_unused_buf(_vq) \
(_vq)->vq_ops->detach_unused_buf(_vq)
#define virtqueue_get_buf(_vq, _lp) \
(_vq)->vq_ops->get_buf(_vq, _lp)
#define virtqueue_add_inbuf(_vq, _sg, _num, _tok, _gfp) \
(_vq)->vq_ops->add_buf(_vq, _sg, 0, _num, _tok)
#define virtqueue_add_outbuf(_vq, _sg, _num, _tok, _gfp) \
(_vq)->vq_ops->add_buf(_vq, _sg, _num, 0, _tok)
#define virtqueue_kick(_vq) \
(_vq)->vq_ops->kick(_vq)
#define virtqueue_enable_cb(_vq) \
(_vq)->vq_ops->enable_cb(_vq)
#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
/* Some simple renaming due to virtio interface changes. */
#define virtqueue_add_inbuf(_vq, _sg, _num, _tok, _gfp) \
virtqueue_add_buf_gfp(_vq, _sg, 0, _num, _tok, _gfp)
#define virtqueue_add_outbuf(_vq, _sg, _num, _tok, _gfp) \
virtqueue_add_buf_gfp(_vq, _sg, _num, 0, _tok, _gfp)
#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
/* Some simple renaming due to virtio interface changes. */
#define virtqueue_add_inbuf(_vq, _sg, _num, _tok, _gfp) \
virtqueue_add_buf(_vq, _sg, 0, _num, _tok, _gfp)
#define virtqueue_add_outbuf(_vq, _sg, _num, _tok, _gfp) \
virtqueue_add_buf(_vq, _sg, _num, 0, _tok, _gfp)
#endif /* 3.3 <= VER < 3.10.0 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
/* The delayed optimization did not exists before version 3.0. */
#define virtqueue_enable_cb_delayed(_vq) virtqueue_enable_cb(_vq)
#endif /* < 3.0 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 2, 0)
/* Not yet found a way to find out virtqueue length in these
kernel series. Use the virtio default value. */
#define virtqueue_get_vring_size(_vq) 256
#endif /* < 3.2 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
/* Before 3.8.0 virtio did not have multiple queues, and therefore
it did not have per-queue data structures. We then abstract the
way data structure are accessed, ignoring the queue indexes. */
#define DECR_NUM(_vi, _i) --(_vi)->num
#define GET_RX_VQ(_vi, _i) (_vi)->rvq
#define GET_TX_VQ(_vi, _i) (_vi)->svq
#define VQ_FULL(_vq, _err) (_err > 0)
static void give_pages(struct SOFTC_T *vi, struct page *page);
static struct page *get_a_page(struct SOFTC_T *vi, gfp_t gfp_mask);
#define GIVE_PAGES(_vi, _i, _buf) give_pages(_vi, _buf)
/* This function did not exists, there was just the code. */
static void free_receive_bufs(struct SOFTC_T *vi)
{
while (vi->pages)
__free_pages(get_a_page(vi, GFP_KERNEL), 0);
}
#else /* >= 3.8.0 */
static void give_pages(struct receive_queue *rq, struct page *page);
#define GIVE_PAGES(_vi, _i, _buf) give_pages(&(_vi)->rq[_i], _buf)
#define DECR_NUM(_vi, _i) --(_vi)->rq[_i].num
#define GET_RX_VQ(_vi, _i) (_vi)->rq[_i].vq
#define GET_TX_VQ(_vi, _i) (_vi)->sq[_i].vq
#define VQ_FULL(_vq, _err) ((_vq)->num_free == 0)
#endif /* >= 3.8.0 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
/* Use the scatterlist struct defined in the current function
(see above). */
#define GET_RX_SG(_vi, _i) &_compat_sg
#define GET_TX_SG(_vi, _i) &_compat_sg
#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
/* Also here we create an abstraction because of multiqueue support
(see above). */
#define GET_RX_SG(_vi, _i) (_vi)->rx_sg
#define GET_TX_SG(_vi, _i) (_vi)->tx_sg
#else /* >= 3.8.0 */
#define GET_RX_SG(_vi, _i) (_vi)->rq[_i].sg
#define GET_TX_SG(_vi, _i) (_vi)->sq[_i].sg
#endif /* >= 3.8.0 */
/* Free all the unused buffer in all the RX virtqueues.
* This function is called when entering and exiting netmap mode.
* In the former case, the unused buffers point to memory allocated by
* the virtio-driver (e.g. sk_buffs). We need to free that
* memory, otherwise we have leakage.
* In the latter case, the unused buffers point to memory allocated by
* netmap, and so we don't need to free anything.
* We scan all the RX virtqueues, even those that have not been
* activated (by 'ethtool --set-channels eth0 combined $N').
*/
static void virtio_netmap_free_rx_unused_bufs(struct SOFTC_T* vi, int onoff)
{
void *buf;
int i, c;
for (i = 0; i < DEV_NUM_RX_QUEUES(vi->dev); i++) {
struct virtqueue *vq = GET_RX_VQ(vi, i);
c = 0;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
if (onoff) {
if (vi->mergeable_rx_bufs || vi->big_packets)
GIVE_PAGES(vi, i, buf);
else
dev_kfree_skb(buf);
}
DECR_NUM(vi, i);
c++;
}
D("[%d] freed %d rx unused bufs on queue %d", onoff, c, i);
}
}
/* Register and unregister. */
static int
virtio_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *vi = netdev_priv(ifp);
struct netmap_hw_adapter *hwna = (struct netmap_hw_adapter*)na;
int error = 0;
if (na == NULL)
return EINVAL;
/* It's important to deny the registration if the interface is
not up, otherwise the virtnet_close() is not matched by a
virtnet_open(), and so a napi_disable() is not matched by
a napi_enable(), which results in a deadlock. */
if (!netif_running(ifp))
return EBUSY;
rtnl_lock();
/* Down the interface. This also disables napi. */
virtnet_close(ifp);
if (onoff) {
/* We have to drain the RX virtqueues, otherwise the
* virtio_netmap_init_buffer() called by the subsequent
* virtnet_open() cannot link the netmap buffers to the
* virtio RX ring. */
virtio_netmap_free_rx_unused_bufs(vi, onoff);
/* Also free the pages allocated by the driver. */
free_receive_bufs(vi);
/* enable netmap mode */
ifp->if_capenable |= IFCAP_NETMAP;
na->na_flags |= NAF_NATIVE_ON;
na->if_transmit = (void *)ifp->netdev_ops;
ifp->netdev_ops = &hwna->nm_ndo;
} else {
ifp->if_capenable &= ~IFCAP_NETMAP;
na->na_flags &= ~NAF_NATIVE_ON;
ifp->netdev_ops = (void *)na->if_transmit;
/* Drain the RX virtqueues, otherwise the driver will
* interpret the netmap buffers currently linked to the
* netmap ring as buffers allocated by the driver. This
* would break the driver (and kernel panic/ooops). */
virtio_netmap_free_rx_unused_bufs(vi, onoff);
}
/* Up the interface. This also enables the napi. */
virtnet_open(ifp);
rtnl_unlock();
return (error);
}
/* Reconcile kernel and user view of the transmit ring. */
static int
virtio_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* device-specific */
COMPAT_DECL_SG
struct SOFTC_T *vi = netdev_priv(ifp);
struct virtqueue *vq = GET_TX_VQ(vi, ring_nr);
struct scatterlist *sg = GET_TX_SG(vi, ring_nr);
struct netmap_adapter *token;
// XXX invert the order
/* Free used slots. We only consider our own used buffers, recognized
* by the token we passed to virtqueue_add_outbuf.
*/
n = 0;
for (;;) {
token = virtqueue_get_buf(vq, &nic_i); /* dummy 2nd arg */
if (token == NULL)
break;
if (likely(token == na))
n++;
}
kring->nr_hwtail += n;
if (kring->nr_hwtail > lim)
kring->nr_hwtail -= lim + 1;
/*
* First part: process new packets to send.
*/
rmb();
if (!netif_carrier_ok(ifp)) {
/* All the new slots are now unavailable. */
goto out;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
void *addr = NMB(slot);
int err;
NM_CHECK_ADDR_LEN(addr, len);
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Initialize the scatterlist, expose it to the hypervisor,
* and kick the hypervisor (if necessary).
*/
sg_set_buf(sg, addr, len);
err = virtqueue_add_outbuf(vq, sg, 1, na, GFP_ATOMIC);
if (err < 0) {
D("virtqueue_add_outbuf failed");
break;
}
virtqueue_kick(vq);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
/* Update hwcur depending on where we stopped. */
kring->nr_hwcur = nm_i; /* note we migth break early */
/* No more free TX slots? Ask the hypervisor for notifications,
* possibly only when a considerable amount of work has been
* done.
*/
if (nm_kr_txempty(kring))
virtqueue_enable_cb_delayed(vq);
}
out:
nm_txsync_finalize(kring);
return 0;
}
/* Reconcile kernel and user view of the receive ring. */
static int
virtio_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
// u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
COMPAT_DECL_SG
struct SOFTC_T *vi = netdev_priv(ifp);
struct virtqueue *vq = GET_RX_VQ(vi, ring_nr);
struct scatterlist *sg = GET_RX_SG(vi, ring_nr);
/* XXX netif_carrier_ok ? */
if (head > lim)
return netmap_ring_reinit(kring);
rmb();
/*
* First part: import newly received packets.
* Only accept our
* own buffers (matching the token). We should only get
* matching buffers, because of virtio_netmap_free_rx_unused_bufs()
* and virtio_netmap_init_buffers().
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
struct netmap_adapter *token;
nm_i = kring->nr_hwtail;
n = 0;
for (;;) {
int len;
token = virtqueue_get_buf(vq, &len);
if (token == NULL)
break;
if (likely(token == na)) {
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
nm_i = nm_next(nm_i, lim);
n++;
} else {
D("This should not happen");
}
}
kring->nr_hwtail = nm_i;
kring->nr_kflags &= ~NKR_PENDINTR;
}
ND("[B] h %d c %d hwcur %d hwtail %d",
ring->head, ring->cur, kring->nr_hwcur,
kring->nr_hwtail);
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur; /* netmap ring index */
if (nm_i != head) {
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
void *addr = NMB(slot);
int err;
if (addr == netmap_buffer_base) /* bad buf */
return netmap_ring_reinit(kring);
slot->flags &= ~NS_BUF_CHANGED;
/* Initialize the scatterlist, expose it to the hypervisor,
* and kick the hypervisor (if necessary).
*/
sg_set_buf(sg, addr, ring->nr_buf_size);
err = virtqueue_add_inbuf(vq, sg, 1, na, GFP_ATOMIC);
if (err < 0) {
D("virtqueue_add_inbuf failed");
return err;
}
virtqueue_kick(vq);
nm_i = nm_next(nm_i, lim);
}
kring->nr_hwcur = head;
}
/* We have finished processing used RX buffers, so we have to tell
* the hypervisor to make a call when more used RX buffers will be
* ready.
*/
virtqueue_enable_cb(vq);
/* tell userspace that there might be new packets. */
nm_rxsync_finalize(kring);
ND("[C] h %d c %d t %d hwcur %d hwtail %d",
ring->head, ring->cur, ring->tail,
kring->nr_hwcur, kring->nr_hwtail);
return 0;
}
/* Make RX virtqueues buffers pointing to netmap buffers. */
static int virtio_netmap_init_buffers(struct SOFTC_T *vi)
{
struct ifnet *ifp = vi->dev;
struct netmap_adapter* na = NA(ifp);
unsigned int r;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
for (r = 0; r < na->num_rx_rings; r++) {
COMPAT_DECL_SG
struct netmap_ring *ring = na->rx_rings[r].ring;
struct virtqueue *vq = GET_RX_VQ(vi, r);
struct scatterlist *sg = GET_RX_SG(vi, r);
struct netmap_slot* slot;
unsigned int i;
int err = 0;
slot = netmap_reset(na, NR_RX, r, 0);
if (!slot) {
D("strange, null netmap ring %d", r);
return 0;
}
/* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue.
* It's important to leave one virtqueue slot free, otherwise
* we can run into ring->cur/ring->tail wraparounds.
*/
for (i = 0; i < na->num_rx_desc-1; i++) {
void *addr;
slot = &ring->slot[i];
addr = NMB(slot);
sg_set_buf(sg, addr, ring->nr_buf_size);
err = virtqueue_add_inbuf(vq, sg, 1, na, GFP_ATOMIC);
if (err < 0) {
D("virtqueue_add_inbuf failed");
return 0;
}
if (VQ_FULL(vq, err))
break;
}
D("added %d inbufs on queue %d", i, r);
virtqueue_kick(vq);
}
return 1;
}
/* Update the virtio-net device configurations. Number of queues can
* change dinamically, by 'ethtool --set-channels $IFNAME combined $N'.
* This is actually the only way virtio-net can currently enable
* the multiqueue mode.
*/
static int
virtio_netmap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
u_int *rxr, u_int *rxd)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *vi = netdev_priv(ifp);
*txr = ifp->real_num_tx_queues;
*txd = virtqueue_get_vring_size(GET_TX_VQ(vi, 0));
*rxr = 1;
*rxd = virtqueue_get_vring_size(GET_RX_VQ(vi, 0));
D("virtio config txq=%d, txd=%d rxq=%d, rxd=%d",
*txr, *txd, *rxr, *rxd);
return 0;
}
static void
virtio_netmap_attach(struct SOFTC_T *vi)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = vi->dev;
na.num_tx_desc = virtqueue_get_vring_size(GET_TX_VQ(vi, 0));
na.num_rx_desc = virtqueue_get_vring_size(GET_RX_VQ(vi, 0));
na.nm_register = virtio_netmap_reg;
na.nm_txsync = virtio_netmap_txsync;
na.nm_rxsync = virtio_netmap_rxsync;
na.nm_config = virtio_netmap_config;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
D("virtio attached txq=%d, txd=%d rxq=%d, rxd=%d",
na.num_tx_rings, na.num_tx_desc,
na.num_tx_rings, na.num_rx_desc);
}
/* end of file */

View File

@ -1,145 +0,0 @@
diff -urp --exclude '*.o' --exclude '*.cmd' --exclude '*mod.c' drivers/net/ethernet/mellanox/mlx4/en_netdev.c ./mellanox/mlx4/en_netdev.c
--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c 2012-09-11 20:50:55.982624673 -0700
+++ ./mellanox/mlx4/en_netdev.c 2012-09-27 00:05:22.703523430 -0700
@@ -48,6 +48,39 @@
#include "mlx4_en.h"
#include "en_port.h"
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * This driver is split in multiple small files.
+ * The main device descriptor has type struct mlx4_en_priv *priv;
+ * and we attach to the device in mlx4_en_init_netdev()
+ * (do port numbers start from 1 ?)
+ *
+ * The reconfig routine is in mlx4_en_start_port() (also here)
+ * which is called on a mlx4_en_restart() (watchdog), open and set-mtu.
+ *
+ * priv->num_frags ??
+ * DS_SIZE ??
+ * apparently each rx desc is followed by frag.descriptors
+ * and the rx desc is rounded up to a power of 2.
+ *
+ * Receive code is in en_rx.c
+ * priv->rx_ring_num number of rx rings
+ * rxr = prov->rx_ring[ring_ind] rx ring descriptor
+ * rxr->size number of slots
+ * rxr->prod producer
+ * probably written into a mmio reg at *rxr->wqres.db.db
+ * trimmed to 16 bits.
+ *
+ * Rx init routine:
+ * mlx4_en_activate_rx_rings()
+ * mlx4_en_init_rx_desc()
+ * Transmit code is in en_tx.c
+ */
+
+#define NETMAP_MLX4_MAIN
+#include <mlx4_netmap_linux.h> /* extern stuff */
+#endif /* CONFIG_NETMAP */
+
int mlx4_en_setup_tc(struct net_device *dev, u8 up)
{
if (up != MLX4_EN_NUM_UP)
@@ -1042,6 +1075,9 @@ int mlx4_en_start_port(struct net_device
/* Set initial ownership of all Tx TXBBs to SW (1) */
for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
*((u32 *) (tx_ring->buf + j)) = 0xffffffff;
+#ifdef DEV_NETMAP
+ mlx4_netmap_tx_config(priv, i);
+#endif /* DEV_NETMAP */
++tx_index;
}
@@ -1639,6 +1675,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d
en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+#ifdef DEV_NETMAP
+ mlx4_netmap_attach(priv);
+#endif /* DEV_NETMAP */
return 0;
out:
--- drivers/net/ethernet/mellanox/mlx4/en_rx.c 2012-09-11 20:50:55.982624673 -0700
+++ ./mellanox/mlx4/en_rx.c 2012-09-27 00:13:16.099550954 -0700
@@ -41,6 +41,9 @@
#include "mlx4_en.h"
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <mlx4_netmap_linux.h>
+#endif /* !DEV_NETMAP */
static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
struct mlx4_en_rx_desc *rx_desc,
@@ -365,9 +368,16 @@ int mlx4_en_activate_rx_rings(struct mlx
ring = &priv->rx_ring[ring_ind];
ring->size_mask = ring->actual_size - 1;
+#ifdef DEV_NETMAP
+ if (priv->dev->if_capenable & IFCAP_NETMAP) {
+ int saved_cons = ring->cons;
+ mlx4_en_free_rx_buf(priv, ring);
+ ring->cons = saved_cons;
+ mlx4_netmap_rx_config(priv, ring_ind);
+ }
+#endif /* DEV_NETMAP */
mlx4_en_update_rx_prod_db(ring);
}
-
return 0;
err_buffers:
@@ -402,6 +412,11 @@ void mlx4_en_destroy_rx_ring(struct mlx4
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring)
{
+#ifdef DEV_NETMAP
+ if (priv->dev->if_capenable & IFCAP_NETMAP)
+ ND("netmap mode, rx buf already freed");
+ else
+#endif /* DEV_NETMAP */
mlx4_en_free_rx_buf(priv, ring);
if (ring->stride <= TXBB_SIZE)
ring->buf -= TXBB_SIZE;
@@ -718,6 +739,11 @@ int mlx4_en_poll_rx_cq(struct napi_struc
struct mlx4_en_priv *priv = netdev_priv(dev);
int done;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(cq->dev, cq->ring, &done)) {
+ ND("rx_irq %d for netmap, budget %d done %d", cq->ring, budget, done);
+ } else
+#endif /* DEV_NETMAP */
done = mlx4_en_process_rx_cq(dev, cq, budget);
/* If we used up all the quota - we're probably not done yet... */
--- drivers/net/ethernet/mellanox/mlx4/en_tx.c 2012-09-11 20:50:55.982624673 -0700
+++ ./mellanox/mlx4/en_tx.c 2012-09-27 00:05:22.713523348 -0700
@@ -55,6 +55,10 @@ MODULE_PARM_DESC(inline_thold, "threshol
static u32 hashrnd __read_mostly;
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <mlx4_netmap_linux.h> /* extern stuff */
+#endif /* CONFIG_NETMAP */
+
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, u32 size,
u16 stride)
@@ -396,6 +400,13 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq)
if (!spin_trylock(&ring->comp_lock))
return;
+#ifdef DEV_NETMAP
+ /* XXX should be integrated with appropriate lock_wrapper manner? */
+ if (netmap_tx_irq(cq->dev, cq->ring)) {
+ ND(5, "wakeup queue %d", cq->ring);
+ spin_unlock(&ring->comp_lock);
+ return;
+ }
+#endif /* DEV_NETMAP */
mlx4_en_process_tx_cq(cq->dev, cq);
mod_timer(&cq->timer, jiffies + 1);
spin_unlock(&ring->comp_lock);

View File

@ -1,163 +0,0 @@
diff -urp --exclude '*.o' --exclude '*.cmd' --exclude '*mod.c' drivers/net/ethernet/mellanox/mlx4/en_netdev.c ./mellanox/mlx4/en_netdev.c
--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c 2012-09-11 20:50:55.982624673 -0700
+++ ./mlx4/en_netdev.c 2012-09-27 00:05:22.703523430 -0700
@@ -48,6 +48,39 @@
#include "mlx4_en.h"
#include "en_port.h"
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+/*
+ * This driver is split in multiple small files.
+ * The main device descriptor has type struct mlx4_en_priv *priv;
+ * and we attach to the device in mlx4_en_init_netdev()
+ * (do port numbers start from 1 ?)
+ *
+ * The reconfig routine is in mlx4_en_start_port() (also here)
+ * which is called on a mlx4_en_restart() (watchdog), open and set-mtu.
+ *
+ * priv->num_frags ??
+ * DS_SIZE ??
+ * apparently each rx desc is followed by frag.descriptors
+ * and the rx desc is rounded up to a power of 2.
+ *
+ * Receive code is in en_rx.c
+ * priv->rx_ring_num number of rx rings
+ * rxr = prov->rx_ring[ring_ind] rx ring descriptor
+ * rxr->size number of slots
+ * rxr->prod producer
+ * probably written into a mmio reg at *rxr->wqres.db.db
+ * trimmed to 16 bits.
+ *
+ * Rx init routine:
+ * mlx4_en_activate_rx_rings()
+ * mlx4_en_init_rx_desc()
+ * Transmit code is in en_tx.c
+ */
+
+#define NETMAP_MLX4_MAIN
+#include <mlx4_netmap_linux.h> /* extern stuff */
+#endif /* CONFIG_NETMAP */
+
int mlx4_en_setup_tc(struct net_device *dev, u8 up)
{
if (up != MLX4_EN_NUM_UP)
@@ -1042,6 +1075,9 @@ int mlx4_en_start_port(struct net_device
/* Set initial ownership of all Tx TXBBs to SW (1) */
for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
*((u32 *) (tx_ring->buf + j)) = 0xffffffff;
+#ifdef DEV_NETMAP
+ mlx4_netmap_tx_config(priv, i);
+#endif /* DEV_NETMAP */
++tx_index;
}
@@ -1639,6 +1675,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d
en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+#ifdef DEV_NETMAP
+ mlx4_netmap_attach(priv);
+#endif /* DEV_NETMAP */
return 0;
out:
--- drivers/net/ethernet/mellanox/mlx4/en_rx.c 2012-09-11 20:50:55.982624673 -0700
+++ ./mlx4/en_rx.c 2012-09-27 00:13:16.099550954 -0700
@@ -41,6 +41,9 @@
#include "mlx4_en.h"
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <mlx4_netmap_linux.h>
+#endif /* !DEV_NETMAP */
static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
struct mlx4_en_rx_desc *rx_desc,
@@ -365,9 +368,16 @@ int mlx4_en_activate_rx_rings(struct mlx
ring = &priv->rx_ring[ring_ind];
ring->size_mask = ring->actual_size - 1;
+#ifdef DEV_NETMAP
+ if (priv->dev->if_capenable & IFCAP_NETMAP) {
+ int saved_cons = ring->cons;
+ mlx4_en_free_rx_buf(priv, ring);
+ ring->cons = saved_cons;
+ mlx4_netmap_rx_config(priv, ring_ind);
+ }
+#endif /* DEV_NETMAP */
mlx4_en_update_rx_prod_db(ring);
}
-
return 0;
err_buffers:
@@ -402,6 +412,11 @@ void mlx4_en_destroy_rx_ring(struct mlx4
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring)
{
+#ifdef DEV_NETMAP
+ if (priv->dev->if_capenable & IFCAP_NETMAP)
+ ND("netmap mode, rx buf already freed");
+ else
+#endif /* DEV_NETMAP */
mlx4_en_free_rx_buf(priv, ring);
if (ring->stride <= TXBB_SIZE)
ring->buf -= TXBB_SIZE;
@@ -692,6 +707,12 @@ out:
wmb(); /* ensure HW sees CQ consumer before we post new buffers */
ring->cons = mcq->cons_index;
ring->prod += polled; /* Polled descriptors were realocated in place */
+
+ ND(5, "set_ci %d 0x%p val %d prod_db 0x%p val %d",
+ cq->ring,
+ mcq->set_ci_db, mcq->cons_index & 0xffffff,
+ ring->wqres.db.db, ring->prod & 0xffff);
+
mlx4_en_update_rx_prod_db(ring);
ring->csum_ok += csum_ok;
ring->csum_none += csum_none;
@@ -718,6 +739,13 @@ int mlx4_en_poll_rx_cq(struct napi_struc
struct mlx4_en_priv *priv = netdev_priv(dev);
int done;
+#ifdef DEV_NETMAP
+ static int cnt = 0;
+ ND(5,"XXXXXX-------XXXXXXXXXXX-------- poll-rx-cq %d count %d", (int)cq->ring, cnt++);
+ if (netmap_rx_irq(cq->dev, cq->ring, &done)) {
+ ND("rx_irq %d for netmap, budget %d done %d", cq->ring, budget, done);
+ } else
+#endif /* DEV_NETMAP */
done = mlx4_en_process_rx_cq(dev, cq, budget);
/* If we used up all the quota - we're probably not done yet... */
--- drivers/net/ethernet/mellanox/mlx4/en_tx.c 2012-09-11 20:50:55.982624673 -0700
+++ ./mlx4/en_tx.c 2012-09-27 00:05:22.713523348 -0700
@@ -55,6 +55,10 @@ MODULE_PARM_DESC(inline_thold, "threshol
static u32 hashrnd __read_mostly;
+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <mlx4_netmap_linux.h> /* extern stuff */
+#endif /* CONFIG_NETMAP */
+
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, u32 size,
u16 stride)
@@ -396,6 +400,17 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq)
if (!spin_trylock(&ring->comp_lock))
return;
+#ifdef DEV_NETMAP // XXX unlock and return should be in the 'if' branch
+ static int cnt = 0;
+ ND(5,"XXXXXX-------XXXXXXXXXXX-------- tx-irq %d count %d", (int)cq->ring, cnt++);
+ if (netmap_tx_irq(cq->dev, cq->ring)) {
+ ND(5, "wakeup queue %d", cq->ring);
+ } else {
+ RD(5, "XXXXXXXXX tx_irq %d unexpected, ignoring", cq->ring);
+ }
+ spin_unlock(&ring->comp_lock);
+ return;
+#endif /* DEV_NETMAP */
mlx4_en_process_tx_cq(cq->dev, cq);
mod_timer(&cq->timer, jiffies + 1);
spin_unlock(&ring->comp_lock);

View File

@ -1,30 +0,0 @@
# $Id$
# targets to build tarballs and diffs
# build a distribution
RELEASE_SRCS := ./sys/net ./sys/dev ./sys/modules ./examples
RELEASE_SRCS += ./README* ./LINUX ./OSX
RELEASE_EXCL := --exclude .svn --exclude examples/testmod
RELEASE_EXCL += --exclude connlib\*
RELEASE_EXCL += --exclude if_epair.diff
#RELEASE_EXCL += --exclude \*-patches
RELEASE_EXCL += --exclude \*bnx2x\* --exclude \*mellanox\* --exclude \*mlx4\*
RELEASE_EXCL += --exclude OSX
all:
@echo "What do you want to do ?"
diff-head:
(cd ~/FreeBSD/head ; \
svn diff sys/conf sys/dev sbin/ifconfig ) > head-netmap.diff
# XXX remember to patch sbin/ifconfig if not done yet
diff-r8:
(cd ~/FreeBSD/RELENG_8 ; \
svn diff sys/conf sys/dev sbin/ifconfig ) > r8-netmap.diff
release:
D=`date +%Y%m%d` && tar cvzf /tmp/$${D}-netmap.tgz \
-s'/^./netmap-release/' $(RELEASE_EXCL) $(RELEASE_SRCS)

View File

@ -1,131 +0,0 @@
# $Id$
Adding netmap support to network device drivers
------------------------------------------------
Netmap requires some small modifications to device drivers
to support the new API. You will need to add small patches
in 3-4 places in the original source, and implement typically
5 new functions.
Device driver patches
------------------------
+ in the initial part of the source, after the device-specific
headers and prototypes have been declared, add the following
<pre>
+#if defined(DEV_NETMAP) || defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE)
+#include <dev/netmap/if_re_netmap.h>
+#endif /* !DEV_NETMAP */
</pre>
The place is typically ... in FreeBSD, and
... on Linux.
The header really contains the new functions that implement
the netmap API. Including them inline simplifies the building
as it does not require to insert additional dependencies in the
build system.
On FreeBSD DEV_NETMAP is sufficient to detect whether netmap extensions
should be compiled in, whereas CONFIG_NETMAP and CONFIG_NETMAP_MODULE
are the Linux equivalent.
If a driver is made of multiple source files, you will need to include
the additional header in all the (few) patched files, preferably using
a macro such as NETMAP_FOO_MAIN to indicate the file where the
new functions should be compiled in.
+ near the end of the attach routine, once the ifnet/net_device structure
has been filled and initialized, add
<pre>
+#ifdef DEV_NETMAP
+ foo_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
</pre>
The argument is either the ifnet or the private device descriptor.
This is in foo_attach() on FreeBSD, and somewhere in the path of
XXX foo_open() in Linux
+ near the code called on device removal, add
<pre>
+#ifdef DEV_NETMAP
+ netmap_detach(ifp);
+#endif /* DEV_NETMAP */
</pre>
+ after the tx/rx rings have been initialized, add a patch like this:
<pre>
+#ifdef DEV_NETMAP
+ foo_netmap_config(priv);
+#endif /* DEV_NETMAP */
</pre>
The argument is typically the private device descriptor, or even
the struct ifnet/net_device.
+ in the interrupt dispatch routines, something like
<pre>
+#ifdef DEV_NETMAP
+ int dummy;
+ if (netmap_rx_irq(adapter->netdev, rx_ring->queue_index, &dummy))
+ return true;
+#endif /* DEV_NETMAP */
...
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(adapter->netdev, tx_ring->queue_index))
+ return true; /* seems to be ignored */
+#endif /* DEV_NETMAP */
</pre>
to skip the normal processing and instead wake up the process in
charge of doing I/O
New functions
----------------
The new functions serve to register the netmap-enabled device driver,
support the enable/disable of netmap mode, attach netmap buffers to the
NIC rings, and finally implement the handlers (*_txsync(), *_rxsync())
called by the system calls.
* foo_netmap_attach()
This is a relatively mechanical function. The purpose is to fetch from
the device descriptor information on the number of rings and buffers,
the way locks are used, and invoke netmap_attach().
* foo_netmap_config()
This function is in charge of (over)writing the NIC rings with
pointers to the netmap buffers. Although this is device dependent,
we can often ignore the locking issue and expect that the locking is
already taken care of by the caller.
foo_netmap_config() only needs to run if the card is in netmap mode.
A quick way to check is to call netmap_ring_init() on one of the rings,
if the function returns NULL we can immediately exit.
Otherwise, we should run a couple of nested loops (on the rings,
and then on the buffers) to fill the NIC descriptors with the
addresses of the (preallocated) netmap buffers.
For the TX rings this can even be a no-op because these rings are
typically uninitialized, and the pointers can be overridden in the
txsync() routine.
For the receive ring, the operation is more critical because the
buffers should be available by the time the NIC is enabled.
Note that the device driver typically maintains head and tail pointers
to indicate which buffers are used. It might be convenient to retain
these indexes because may of the support routines, watchdogs etc.
depends on their values.
We should note that, especially on the receive ring, there might be
an offset between the indexes used in the netmap ring and those used
in the NIC ring (which might even be non-contiguous).
* foo_netmap_reg()
support entering/exiting of netmap mode. Typically, lock, stop the device,
set/clear the netmap flag, and restart the device.
An unfortunate side effect of stopping and restarting the device is that
in many drivers the link is reinitialized, causing long delays for the
speed negotiations and spanning tree setup.
* foo_netmap_txsync()
* foo_netmap_rxsync()

View File

@ -1,241 +0,0 @@
Netmap - a framework for fast packet I/O
VALE - a Virtual Local Ethernet using the netmap API
========================================================================
NETMAP is a framework for very fast packet I/O from userspace.
VALE is an equally fast in-kernel software switch using the netmap API.
Both are implemented as a single kernel module for FreeBSD and Linux,
and can deal with line rate on real or emulated 10 Gbit ports.
See details at
http://info.iet.unipi.it/~luigi/netmap/
In this directory you can find source code (BSD-Copyright) for FreeBSD
and Linux. Note that recent FreeBSD distributions already include both
NETMAP and VALE.
For more details please look at the manpage (netmap.4) and
netmap home page above.
What is this good for
---------------------
Netmap is mostly useful for userspace applications that must deal with raw
packets: traffic generators, sinks, monitors, loggers, software switches
and routers, generic middleboxes, interconnection of virtual machines.
In this distribution you will find some example userspace code to build
a generator, a sink, and a simple bridge. The kernel module implements a
learning ethernet bridge. We also include patches for some applications
(noticeably libpcap) so you can run any libpcap client on top of netmap
hopefully at a higher speed.
Netmap alone DOES NOT accelerate your TCP. For that you need to implement
your own tcp/ip stack probably using some of the techniques indicated
below to reduce the processing costs.
Architecture
------------
netmap uses a number of techniques to establish a fast and efficient path
between applications and the network. In order of importance:
1. I/O batching
2. efficient device drivers
3. pre-allocated tx/rx buffers
4. memory mapped buffers
Despite the name, memory mapping is NOT the key feature for netmap's
speed; systems that do not apply all these techniques do not achieve
the same speed _and_ efficiency.
Netmap clients use a select()-able file descriptor to synchronize
with the network card/software switch, and exchange multiple packets
per system call through device-independent memory mapped buffers and
descriptors. Device drivers are completely in the kernel, and the system
does not rely on IOMMU or other special mechanisms.
Installation instructions
-------------------------
A kernel module (netmap.ko or netmap_lin.ko) implements the core
NETMAP routines and the VALE switch.
Netmap-aware device drivers are needed to use netmap on ethernet ports.
To date, we have support for Intel ixgbe (10G), e1000/e1000e/igb (1G),
Realtek 8169 (1G) and Nvidia (1G).
If you do not have a supported device, you can still try out netmap
(with reduced performance) because the main kernel module emulates
the netmap API on top of standard device drivers.
FreeBSD instructions:
---------------------
Since recent FreeBSD distributions already include netmap, you only
need build the new kernel or modules as below:
+ add 'device netmap' to your kernel config file and rebuild a kernel.
This will include the netmap module and netmap support in the device
drivers. Alternatively, you can build standalone modules
(netmap, ixgbe, em, lem, re, igb)
+ sample applications are in the examples/ directory in this archive,
or in src/tools/tools/netmap/ in FreeBSD distributions
Linux instructions:
-------------------
On Linux, netmap is an out-of-tree module, so you need to compile it
from these sources. The Makefile in the LINUX/ directory will also
let you patch device driver sources and build some netmap-enabled
device drivers.
+ make sure you have kernel sources matching your installed kernel
(headers only suffice, if you want NETMAP/VALE but no drivers)
+ build kernel modules and sample applications:
If kernel sources are in /foo//linux-A.B.C/ , then you should do
cd netmap/LINUX
# build kernel modules
make NODRIVERS=1 KSRC=/foo/linux-A.B.C/ # only netmap
make KSRC=/a/b/c/linux-A.B.C/ # netmap+device drivers
# build sample applications
make KSRC=/a/b/c/linux-A.B.C/ apps # builds sample applications
You can omit KSRC if your kernel sources are in a standard place.
Applications
------------
The directory examples/ contains some programs that use the netmap API
pkt-gen.c a packet generator/receiver working at line rate at 10Gbit/s
vale-cfg.c utility to configure ports of a VALE switch
bridge.c a utility that bridges two interfaces or one interface
with the host stack
For libpcap and other applications look at the extra/ directory.
Testing
-------
pkt-gen is a generic test program which can act as a sender or receiver.
It has a large number of options, but the simplest form is:
pkt-gen -i ix0 -f rx # receive and print stats
pkt-gen -i ix0 -f tx -l 60 # send a stream of 60-byte packets
(replace ix0 with the name of the interface or VALE port).
This should be able to work at line rate (up to 14.88 Mpps on 10
Gbit/interfaces, even higher on VALE) but note the following
OPERATING SPEED
---------------
Netmap is able to send packets at very high rates, and for simple
packet transmission and reception, speed generally not limited by
the CPU but by other factors (link speed, bus or NIC hw limitations).
For a physical link, the maximum numer of packets per second can
be computed with the formula:
pps = line_rate / (672 + 8 * pkt_size)
where "line_rate" is the nominal link rate (e.g 10 Gbit/s) and
pkt_size is the actual packet size including MAC headers and CRC.
The following table summarizes some results
LINE RATE
pkt_size \ 100M 1G 10G 40G
64 .1488 1.488 14.88 59.52
128 .0589 0.589 5.89 23.58
256 .0367 0.367 3.67 14.70
512 .0209 0.209 2.09 8.38
1024 .0113 0.113 1.13 4.51
1518 .0078 0.078 0.78 3.12
On VALE ports, there is no physical link and the throughput is
limited by CPU or memory depending on the packet size.
COMMON PROBLEMS
---------------
Before reporting slow send or receive speed on a physical interface,
check ALL of the following:
CANNOT SET THE DEVICE IN NETMAP MODE:
+ make sure that the netmap module and drivers are correctly
loaded and can allocate all the memory they need (check into
/var/log/messages or equivalent)
+ check permissions on /dev/netmap
+ make sure the interface is up before invoking pkt-gen
SENDER DOES NOT TRANSMIT
+ some switches/interfaces take a long time to (re)negotiate
the link after starting pkt-gen; in case, use the -w N option
to increase the initial delay to N seconds;
This may cause inability to transmit, or lost packets for
the first few seconds of transmission
RECEIVER DOES NOT RECEIVE
+ make sure traffic uses a broadcast MAC addresses, or the UNICAST
address of the receiving interface, or the receiving interface is in
promiscuous mode (this must be done with ifconfig; pkt-gen does not
change the operating mode)
LOWER SPEED THAN LINE RATE
+ check that your CPUs are running at the maximum clock rate
and are not throttled down by the governor/powerd.
+ make sure that the sender/receiver interfaces and switch have
flow control (FC) disabled (either via sysctl or ethtool).
If FC is enabled and the receiving end is unable to cope
with the traffic, the driver will try to slow down transmission,
sometimes to very low rates.
+ a lot of hardware is not able to sustain line rate. For instance,
ixgbe has problems with receiving frames that are not multiple
of 64 bytes (with/without CRC depending on the driver); also on
transmissions, ixgbe tops at about 12.5 Mpps unless the driver
prefetches tx descriptors. igb does line rate in all configurations.
e1000/e1000e vary between 1.15 and 1.32 Mpps. re/r8169 is
extremely slow in sending (max 4-500 Kpps)
Credits
-------
NETMAP and VALE are projects of the Universita` di Pisa,
partially supported by various entities including:
Intel Research Berkeley, EU FP7 projects CHANGE and OPENLAB,
Netapp/Silicon Valley Community Foundation, ICSI
Author: Luigi Rizzo
Contributors:
Giuseppe Lettieri
Michio Honda
Marta Carbone
Gaetano Catalli
Matteo Landi
Vincenzo Maffione
References
----------
There are a few academic papers describing netmap, VALE and applications.
You can find the papers at http://info.iet.unipi.it/~luigi/research.html
+ Luigi Rizzo,
netmap: a novel framework for fast packet I/O,
Usenix ATC'12, Boston, June 2012
+ Luigi Rizzo,
Revisiting network I/O APIs: the netmap framework,
Communications of the ACM 55 (3), 45-51, March 2012
+ Luigi Rizzo, Marta Carbone, Gaetano Catalli,
Transparent acceleration of software packet forwarding using netmap,
IEEE Infocom 2012, Orlando, March 2012
+ Luigi Rizzo, Giuseppe Lettieri,
VALE: a switched ethernet for virtual machines,
ACM Conext 2012, Nice, Dec. 2012
+ Luigi Rizzo, Giuseppe Lettieri, Vincenzo Maffione,
Speeding up packet I/O in virtual machines,
IEEE/ACM ANCS 2013, San Jose, Oct. 2013

View File

@ -1,416 +0,0 @@
EXPERIMENTING WITH NETMAP, VALE AND FAST QEMU
---------------------------------------------
To ease experiments with Netmap, the VALE switch and our Qemu enhancements
we have prepared a couple of bootable images (linux and FreeBSD).
You can find them on the netmap page
http://info.iet.unipi.it/~luigi/netmap/
where you can also look at more recent versions of this file.
Below are step-by-step instructions on experiments you can run
with these images. The two main versions are
picobsd.hdd -> FreeBSD HEAD (netmap + VALE)
tinycore.hdd -> Linux (qemu + netmap + VALE)
Booting the image
-----------------
For all experiments you need to copy the image on a USB stick
and boot a PC with it. Alternatively, you can use the image
with VirtualBox, Qemu or other emulators, as an example
qemu-system-x86_64 -hda IMAGE_FILE -m 1G -machine accel=kvm ...
(remove 'accel=kvm' if your host does not support kvm).
The images do not install anything on the hard disk.
Both systems have preloaded drivers for a number of network cards
(including the intel 10 Gbit ones) with netmap extensions.
The VALE switch is also available (it is part of the netmap module).
ssh, scp and a few other utilities are also included.
FreeBSD image:
+ the OS boots directly in console mode, you can switch
between terminals with ALT-Fn.
The password for the 'root' account is 'setup'
+ if you are connected to a network, you can use
dhclient em0 # or other interface name
to obtain an IP address and external connectivity.
Linux image:
+ in addition to the netmap/VALE modules, the KVM kernel module
is also preloaded.
+ the boot-loader gives you two main options (each with
a variant to delay boot in case you have slow devices):
+ "Boot TinyCore"
boots in an X11 environment as user 'tc'.
You can create a few terminals using the icon at the
bottom. You can use "sudo -s" to get root access.
In case no suitable video card is available/detected,
it falls back to command line mode.
+ "Boot Core (command line only)"
boots in console mode with virtual terminals.
You're automatically logged in as user 'tc'.
To log in the other terminals use the same username
(no password required).
+ The system should automatically recognize the existing ethernet
devices, and load the appropriate netmap-capable device drivers
when available. Interfaces are configured through DHCP when possible.
General test recommendations
----------------------------
NOTE: The tests outlined in the following sections can generate very high
packet rates, and some hardware misconfiguration problems may prevent
you from achieving maximum speed.
Common problems are:
+ slow link autonegotiation.
Our programs typically wait 2-4 seconds for
link negotiation to complete, but some NIC/switch combinations
are much slower. In this case you should increase the delay
(pkt-gen has the -w XX option for that) or possibly force
the link speed and duplex mode on both sides.
Check the link speed to make sure there are no nogotiation
problems, and that you see the expected speed.
ethtool IFNAME # on linux
ifconfig IFNAME # on FreeBSD
+ ethernet flow control.
If the receiving port is slow (often the case in presence
of multicast/broadcast traffic, or also unicast if you are
sending to non-netmap receivers), it will generate ethernet
flow control frames that throttle down the sender.
We recommend to disable BOTH RX and TX ethernet flow control
on BOTH sender and receiver.
On Linux this can be done with ethtool:
ethtool -A IFNAME tx off rx off
whereas on FreeBSD there are device-specific sysctl
sysctl dev.ix.0.queue0.flow_control = 0
+ CPU power saving.
The CPU governor on linux, or equivalent in FreeBSD, tend to
throttle down the clock rate reducing performance.
Unlike other similar systems, netmap does not have busy-wait
loops, so the CPU load is generally low and this can trigger
the clock slowdown.
Make sure that ALL CPUs run at maximum speed, possibly
disabling the dynamic frequency-scaling mechanisms.
cpufreq-set -gperformance # on linux
sysctl dev.cpu.0.freq=3401 # on FreeBSD.
+ wrong MAC address
netmap does not put the NIC in promiscuous mode, so unless the
application does it, the NIC will only receive broadcast traffic or
unicast directed to its own MAC address.
STANDARD SOCKET TESTS
---------------------
For most socket-based experiments you can use the "netperf" tool installed
on the system (version 2.6.0). Be careful to use a matching version for
the other netperf endpoint (e.g. netserver) when running tests between
different machines.
Interesting experiments are:
netperf -H x.y.z.w -tTCP_STREAM # test TCP throughput
netperf -H x.y.z.w -tTCP_RR # test latency
netperf -H x.y.z.w -tUDP_STREAM -- -m8 # test UDP throughput with short packets
where x.y.z.w is the host running "netserver".
RAW SOCKET AND TAP TESTS
------------------------
For experiments with raw sockets and tap devices you can use the l2
utilities (l2open, l2send, l2recv) installed on the system.
With these utilities you can send/receive custom network packets
to/from raw sockets or tap file descriptors.
The receiver can be run with one of the following commands
l2open -r IFNAME l2recv # receive from a raw socket attached to IFNAME
l2open -t IFNAME l2recv # receive from a file descriptor opened on the tap IFNAME
The receiver process will wait indefinitely for the first packet
and then keep receiving as long as packets keep coming. When the
flow stops (after a 2 seconds timeout) the process terminates and
prints the received packet rate and packet count.
To run the sender in an easy way, you can use the script l2-send.sh
in the home directory. This script defines several shell variables
that can be manually changed to customize the test (see
the comments in the script itself).
As an example, you can test configurations with Virtual
Machines attached to host tap devices bridged together.
Tests using the Linux in-kernel pktgen
--------------------------------------
To use the Linux in-kernel packet generator, you can use the
script "linux-pktgen.sh" in the home directory.
The pktgen creates a kernel thread for each hardware TX queue
of a given NIC.
By manually changing the script shell variable definitions you
can change the test configuration (e.g. addresses in the generated
packet). Please change the "NCPU" variable to match the number
of CPUs on your machine. The script has an argument which
specifies the number of NIC queues (i.e. kernel threads)
to use minus one.
For example:
./linux-pktgen.sh 2 # Uses 3 NIC queues
When the script terminates, it prints the per-queue rates and
the total rate achieved.
NETMAP AND VALE EXPERIMENTS
---------------------------
For most experiments with netmap you can use the "pkt-gen" command
(do not confuse it with the Linux in-kernel pktgen), which has a large
number of options to send and receive traffic (also on TAP devices).
pkt-gen normally generates UDP traffic for a specific IP address
and using the brodadcast MAC address
Netmap testing with network interfaces
--------------------------------------
Remember that you need a netmap-capable driver in order to use
netmap on a specific NIC. Currently supported drivers are e1000,
e1000e, ixgbe, igb. For updated information please visit
http://info.iet.unipi.it/~luigi/netmap/
Before running pkt-gen, make sure that the link is up.
Run pkt-gen on an interface called "IFNAME":
pkt-gen -i IFNAME -f tx # run a pkt-gen sender
pkt-gen -i IFNAME -f rx # run a pkt-gen receiver
pkt-gen without arguments will show other options, e.g.
+ -w sec modifies the wait time for link negotioation
+ -l len modifies the packet size
+ -d, -s set the IP destination/source addresses and ports
+ -D, -S set the MAC destination/source addresses
and more.
Testing the VALE switch
------------------------
To use the VALE switch instead of physical ports you only need
to change the interface name in the pkt-gen command.
As an example, on a single machine, you can run senders and receivers
on multiple ports of a VALE switch as follows (run the commands into
separate terminals to see the output)
pkt-gen -ivale0:01 -ftx # run a sender on the port 01 of the switch vale0
pkt-gen -ivale0:02 -frx # receiver on the port 02 of same switch
pkt-gen -ivale0:03 -ftx # another sender on the port 03
The VALE switches and ports are created (and destroyed) on the fly.
Transparent connection of physical ports to the VALE switch
-----------------------------------------------------------
It is also possible to use a network device as a port of a VALE
switch. You can do this with the following command:
vale-ctl -h vale0:eth0 # attach interface "eth0" to the "vale0" switch
To detach an interface from a bridge:
vale-ctl -d vale0:eth0 # detach interface "eth0" from the "vale0" switch
These operations can be issued at any moment.
Tests with our modified QEMU
----------------------------
The Linux image also contains our modified QEMU, with the VALE backend and
the "e1000-paravirt" frontend (a paravirtualized e1000 emulation).
After you have booted the image on a physical machine (so you can exploit
KVM), you can boot the same image a second time (recursively) with QEMU.
Therefore, you can run all the tests above also from within the virtual
machine environment.
To make VM testing easier, the home directory contains some
some useful scripts to set up and launch VMs on the physical machine.
+ "prep-taps.sh"
creates and sets up two permanent tap interfaces ("tap01" and "tap02")
and a Linux in-kernel bridge. The tap interfaces are then bridged
together on the same bridge. The bridge interface ("br0"), is given
the address 10.0.0.200/24.
This setup can be used to make two VMs communicate through the
host bridge, or to test the speed of a linux switch using
l2open
+ "unprep-taps.sh"
undoes the above setup.
+ "launch-qemu.sh"
can be used to run QEMU virtual machines. It takes four arguments:
+ The first argument can be "qemu" or "kvm", depending on
whether we want to use the standard QEMU binary translation
or the hardware virtualization acceleration.
+ The third argument can be "--tap", "--netuser" or "--vale",
and tells QEMU what network backend to use: a tap device,
the QEMU user networking (slirp), or a VALE switch port.
+ When the third argument is "--tap" or "--vale", the fourth
argument specifies an index (e.g. "01", "02", etc..) which
tells QEMU what tap device or VALE port to use as backend.
You can manually modify the script to set the shell variables that
select the type of emulated device (e.g. e1000, virtio-net-pci, ...)
and related options (ioeventfd, virtio vhost, e1000 mitigation, ....).
The default setup has an "e1000" device with interrupt mitigation
disabled.
You can try the paravirtualized e1000 device ("e1000-paravirt")
or the "virtio-net" device to get better performance. However, bear
in mind that these paravirtualized devices don't have netmap support
(whereas the standard e1000 does have netmap support).
Examples:
# Run a kvm VM attached to the port 01 of a VALE switch
./launch-qemu.sh kvm --vale 01
# Run a kvm VM attached to the port 02 of the same VALE switch
./launch-qemu.sh kvm --vale 02
# Run a kvm VM attached to the tap called "tap01"
./launch-qemu.sh kvm --tap 01
# Run a kvm VM attached to the tap called "tap02"
./launch-qemu.sh kvm --tap 02
Guest-to-guest tests
--------------------
If you run two VMs attached to the same switch (which can be a Linux
bridge or a VALE switch), you can run guest-to-guest experiments.
All the tests reported in the previous sections are possible (normal
sockets, raw sockets, pkt-gen, ...), indipendently of the backend used.
In the following examples we assume that:
+ Each VM has an ethernet interface called "eth0".
+ The interface of the first VM is given the IP 10.0.0.1/24.
+ The interface of the second VM is given the IP 10.0.0.2/24.
+ The Linux bridge interface "br0" on the host is given the
IP 10.0.0.200/24.
Examples:
[1] ### Test UDP short packets over traditional sockets ###
# On the guest 10.0.0.2 run
netserver
# on the guest 10.0.0.1 run
netperf -H10.0.0.2 -tUDP_STREAM -- -m8
[2] ### Test UDP short packets with pkt-gen ###
# On the guest 10.0.0.2 run
pkt-gen -ieth0 -frx
# On the guest 10.0.0.1 run
pkt-gen -ieth0 -ftx
[3] ### Test guest-to-guest latency ###
# On the guest 10.0.0.2 run
netserver
# On the guest 10.0.0.1 run
netperf -H10.0.0.2 -tTCP_RR
Note that you can use pkt-gen into a VM only if the emulated ethernet
device is supported by netmap. The default emulated device is
"e1000", which has netmap support. If you try to run pkt-gen on
an unsupported device, pkt-gen will not work, reporting that it is
unable to register the interface.
Guest-to-host tests (follows from the previous section)
-------------------------------------------------------
If you run only a VM on your host machine, you can measure the
network performance between the VM and the host machine. In this
case the experiment setup depends on the backend you are using.
With the tap backend, you can use the bridge interface "br0" as a
communication endpoint. You can run normal/raw sockets experiments,
but you cannot use pkt-gen on the "br0" interface, since the Linux
bridge interface is not supported by netmap.
Examples with the tap backend:
[1] ### Test TCP throughput over traditional sockets ###
# On the host run
netserver
# on the guest 10.0.0.1 run
netperf -H10.0.0.200 -tTCP_STREAM
[2] ### Test UDP short packets with pkt-gen and l2 ###
# On the host run
l2open -r br0 l2recv
# On the guest 10.0.0.1 run (xx:yy:zz:ww:uu:vv is the
# "br0" hardware address)
pkt-gen -ieth0 -ftx -d10.0.0.200:7777 -Dxx:yy:zz:ww:uu:vv
With the VALE backend you can perform only UDP tests, since we don't have
a netmap application which implements a TCP endpoint: pkt-gen generates
UDP packets.
As a communication endpoint on the host, you can use a virtual VALE port
opened on the fly by a pkt-gen instance.
Examples with the VALE backend:
[1] ### Test UDP short packets ###
# On the host run
pkt-gen -ivale0:99 -frx
# On the guest 10.0.0.1 run
pkt-gen -ieth0 -ftx
[2] ### Test UDP big packets (receiver on the guest) ###
# On the guest 10.0.0.1 run
pkt-gen -ieth0 -frx
# On the host run pkt-gen -ivale0:99 -ftx -l1460

View File

@ -1,43 +0,0 @@
# For multiple programs using a single source file each,
# we can just define 'progs' and create custom targets.
PROGS = pkt-gen bridge vale-ctl
#PROGS += pingd
PROGS += testlock test_select testmmap vale-ctl
LIBNETMAP =
CLEANFILES = $(PROGS) *.o
NO_MAN=
CFLAGS = -O2 -pipe
CFLAGS += -Werror -Wall -Wunused-function
CFLAGS += -I ../sys # -I/home/luigi/FreeBSD/head/sys -I../sys
CFLAGS += -Wextra
ifdef WITH_PCAP
# do not use pcap by default, as it is not always available on linux
LDLIBS += -lpcap
else
CFLAGS += -DNO_PCAP
endif
LDLIBS += -lpthread
ifeq ($(shell uname),Linux)
LDLIBS += -lrt # on linux
endif
#SRCS = pkt-gen.c
all: $(PROGS)
kern_test: testmod/kern_test.c
pkt-gen: pkt-gen.o
bridge: bridge.o
vale-ctl: vale-ctl.o
%-pic.o: %.c
$(CC) $(CFLAGS) -fpic -c $^ -o $@
clean:
-@rm -rf $(CLEANFILES)
testlock: testlock.c

View File

@ -1,41 +0,0 @@
# For multiple programs using a single source file each,
# we can just define 'progs' and create custom targets.
PROGS = pkt-gen bridge vale-ctl
#PROGS += pingd
PROGS += testlock test_select testmmap
MORE_PROGS = kern_test
CLEANFILES = $(PROGS) *.o
NO_MAN=
CFLAGS = -O2 -pipe
CFLAGS += -Werror -Wall -Wunused-function
CFLAGS += -I ../sys # -I/home/luigi/FreeBSD/head/sys -I../sys
CFLAGS += -Wextra
.ifdef WITH_PCAP
LDFLAGS += -lpcap
.else
CFLAGS += -DNO_PCAP
.endif
LDFLAGS += -lpthread
LDFLAGS += -lrt # needed on linux, does not harm on BSD
#SRCS = pkt-gen.c
all: $(PROGS)
kern_test: testmod/kern_test.c
pkt-gen: pkt-gen.o
$(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS)
bridge: bridge.o
$(CC) $(CFLAGS) -o bridge bridge.o
vale-ctl: vale-ctl.o
$(CC) $(CFLAGS) -o vale-ctl vale-ctl.o
clean:
-@rm -rf $(CLEANFILES)
testlock: testlock.c
$(CC) $(CFLAGS) -o testlock testlock.c -lpthread $(LDFLAGS)

View File

@ -1,11 +0,0 @@
$FreeBSD: head/tools/tools/netmap/README 227614 2011-11-17 12:17:39Z luigi $
This directory contains examples that use netmap
pkt-gen a packet sink/source using the netmap API
bridge a two-port jumper wire, also using the native API
testpcap a jumper wire using libnetmap (or libpcap)
click* various click examples

View File

@ -1,317 +0,0 @@
/*
* (C) 2011-2014 Luigi Rizzo, Matteo Landi
*
* BSD license
*
* A netmap client to bridge two network interfaces
* (or one interface and the host stack).
*
* $FreeBSD: head/tools/tools/netmap/bridge.c 228975 2011-12-30 00:04:11Z uqs $
*/
#include <stdio.h>
#define NETMAP_WITH_LIBS
#include <net/netmap_user.h>
#include <sys/poll.h>
int verbose = 0;
static int do_abort = 0;
static int zerocopy = 1; /* enable zerocopy if possible */
static void
sigint_h(int sig)
{
(void)sig; /* UNUSED */
do_abort = 1;
signal(SIGINT, SIG_DFL);
}
/*
* how many packets on this set of queues ?
*/
int
pkt_queued(struct nm_desc *d, int tx)
{
u_int i, tot = 0;
if (tx) {
for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) {
tot += nm_ring_space(NETMAP_TXRING(d->nifp, i));
}
} else {
for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) {
tot += nm_ring_space(NETMAP_RXRING(d->nifp, i));
}
}
return tot;
}
/*
* move up to 'limit' pkts from rxring to txring swapping buffers.
*/
static int
process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
u_int limit, const char *msg)
{
u_int j, k, m = 0;
/* print a warning if any of the ring flags is set (e.g. NM_REINIT) */
if (rxring->flags || txring->flags)
D("%s rxflags %x txflags %x",
msg, rxring->flags, txring->flags);
j = rxring->cur; /* RX */
k = txring->cur; /* TX */
m = nm_ring_space(rxring);
if (m < limit)
limit = m;
m = nm_ring_space(txring);
if (m < limit)
limit = m;
m = limit;
while (limit-- > 0) {
struct netmap_slot *rs = &rxring->slot[j];
struct netmap_slot *ts = &txring->slot[k];
/* swap packets */
if (ts->buf_idx < 2 || rs->buf_idx < 2) {
D("wrong index rx[%d] = %d -> tx[%d] = %d",
j, rs->buf_idx, k, ts->buf_idx);
sleep(2);
}
/* copy the packet length. */
if (rs->len > 2048) {
D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k);
rs->len = 0;
} else if (verbose > 1) {
D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k);
}
ts->len = rs->len;
if (zerocopy) {
uint32_t pkt = ts->buf_idx;
ts->buf_idx = rs->buf_idx;
rs->buf_idx = pkt;
/* report the buffer change. */
ts->flags |= NS_BUF_CHANGED;
rs->flags |= NS_BUF_CHANGED;
} else {
char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx);
char *txbuf = NETMAP_BUF(txring, ts->buf_idx);
nm_pkt_copy(rxbuf, txbuf, ts->len);
}
j = nm_ring_next(rxring, j);
k = nm_ring_next(txring, k);
}
rxring->head = rxring->cur = j;
txring->head = txring->cur = k;
if (verbose && m > 0)
D("%s sent %d packets to %p", msg, m, txring);
return (m);
}
/* move packts from src to destination */
static int
move(struct nm_desc *src, struct nm_desc *dst, u_int limit)
{
struct netmap_ring *txring, *rxring;
u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring;
const char *msg = (src->req.nr_ringid & NETMAP_SW_RING) ?
"host->net" : "net->host";
while (si <= src->last_rx_ring && di <= dst->last_tx_ring) {
rxring = NETMAP_RXRING(src->nifp, si);
txring = NETMAP_TXRING(dst->nifp, di);
ND("txring %p rxring %p", txring, rxring);
if (nm_ring_empty(rxring)) {
si++;
continue;
}
if (nm_ring_empty(txring)) {
di++;
continue;
}
m += process_rings(rxring, txring, limit, msg);
}
return (m);
}
static void
usage(void)
{
fprintf(stderr,
"usage: bridge [-v] [-i ifa] [-i ifb] [-b burst] [-w wait_time] [iface]\n");
exit(1);
}
/*
* bridge [-v] if1 [if2]
*
* If only one name, or the two interfaces are the same,
* bridges userland and the adapter. Otherwise bridge
* two intefaces.
*/
int
main(int argc, char **argv)
{
struct pollfd pollfd[2];
int ch;
u_int burst = 1024, wait_link = 4;
struct nm_desc *pa = NULL, *pb = NULL;
char *ifa = NULL, *ifb = NULL;
char ifabuf[64] = { 0 };
fprintf(stderr, "%s built %s %s\n",
argv[0], __DATE__, __TIME__);
while ( (ch = getopt(argc, argv, "b:ci:vw:")) != -1) {
switch (ch) {
default:
D("bad option %c %s", ch, optarg);
usage();
break;
case 'b': /* burst */
burst = atoi(optarg);
break;
case 'i': /* interface */
if (ifa == NULL)
ifa = optarg;
else if (ifb == NULL)
ifb = optarg;
else
D("%s ignored, already have 2 interfaces",
optarg);
break;
case 'c':
zerocopy = 0; /* do not zerocopy */
break;
case 'v':
verbose++;
break;
case 'w':
wait_link = atoi(optarg);
break;
}
}
argc -= optind;
argv += optind;
if (argc > 1)
ifa = argv[1];
if (argc > 2)
ifb = argv[2];
if (argc > 3)
burst = atoi(argv[3]);
if (!ifb)
ifb = ifa;
if (!ifa) {
D("missing interface");
usage();
}
if (burst < 1 || burst > 8192) {
D("invalid burst %d, set to 1024", burst);
burst = 1024;
}
if (wait_link > 100) {
D("invalid wait_link %d, set to 4", wait_link);
wait_link = 4;
}
if (!strcmp(ifa, ifb)) {
D("same interface, endpoint 0 goes to host");
snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa);
ifa = ifabuf;
} else {
/* two different interfaces. Take all rings on if1 */
}
pa = nm_open(ifa, NULL, 0, NULL);
if (pa == NULL) {
D("cannot open %s", ifa);
return (1);
}
// XXX use a single mmap ?
pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa);
if (pb == NULL) {
D("cannot open %s", ifb);
nm_close(pa);
return (1);
}
zerocopy = zerocopy && (pa->mem == pb->mem);
D("------- zerocopy %ssupported", zerocopy ? "" : "NOT ");
/* setup poll(2) variables. */
memset(pollfd, 0, sizeof(pollfd));
pollfd[0].fd = pa->fd;
pollfd[1].fd = pb->fd;
D("Wait %d secs for link to come up...", wait_link);
sleep(wait_link);
D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.",
pa->req.nr_name, pa->first_rx_ring, pa->req.nr_rx_rings,
pb->req.nr_name, pb->first_rx_ring, pb->req.nr_rx_rings);
/* main loop */
signal(SIGINT, sigint_h);
while (!do_abort) {
int n0, n1, ret;
pollfd[0].events = pollfd[1].events = 0;
pollfd[0].revents = pollfd[1].revents = 0;
n0 = pkt_queued(pa, 0);
n1 = pkt_queued(pb, 0);
if (n0)
pollfd[1].events |= POLLOUT;
else
pollfd[0].events |= POLLIN;
if (n1)
pollfd[0].events |= POLLOUT;
else
pollfd[1].events |= POLLIN;
ret = poll(pollfd, 2, 2500);
if (ret <= 0 || verbose)
D("poll %s [0] ev %x %x rx %d@%d tx %d,"
" [1] ev %x %x rx %d@%d tx %d",
ret <= 0 ? "timeout" : "ok",
pollfd[0].events,
pollfd[0].revents,
pkt_queued(pa, 0),
NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur,
pkt_queued(pa, 1),
pollfd[1].events,
pollfd[1].revents,
pkt_queued(pb, 0),
NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur,
pkt_queued(pb, 1)
);
if (ret < 0)
continue;
if (pollfd[0].revents & POLLERR) {
struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring);
D("error on fd0, rx [%d,%d,%d)",
rx->head, rx->cur, rx->tail);
}
if (pollfd[1].revents & POLLERR) {
struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring);
D("error on fd1, rx [%d,%d,%d)",
rx->head, rx->cur, rx->tail);
}
if (pollfd[0].revents & POLLOUT) {
move(pb, pa, burst);
// XXX we don't need the ioctl */
// ioctl(me[0].fd, NIOCTXSYNC, NULL);
}
if (pollfd[1].revents & POLLOUT) {
move(pa, pb, burst);
// XXX we don't need the ioctl */
// ioctl(me[1].fd, NIOCTXSYNC, NULL);
}
}
D("exiting");
nm_close(pb);
nm_close(pa);
return (0);
}

View File

@ -1,19 +0,0 @@
//
// $FreeBSD: head/tools/tools/netmap/click-test.cfg 227614 2011-11-17 12:17:39Z luigi $
//
// A sample test configuration for click
//
//
// create a switch
myswitch :: EtherSwitch;
// two input devices
c0 :: FromDevice(ix0, PROMISC true);
c1 :: FromDevice(ix1, PROMISC true);
// and now pass packets around
c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0);
c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1);

File diff suppressed because it is too large Load Diff

View File

@ -1,74 +0,0 @@
/*
* test minimum select time
*
* ./prog usec [method [duration]]
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/time.h>
#include <poll.h>
#include <inttypes.h>
enum { M_SELECT =0 , M_POLL, M_USLEEP };
static const char *names[] = { "select", "poll", "usleep" };
int
main(int argc, char *argv[])
{
struct timeval ta, tb, prev;
int usec = 1, total = 0, method = M_SELECT;
uint32_t *vals = NULL;
uint32_t i, count = 0;
#define LIM 1000000
if (argc > 1)
usec = atoi(argv[1]);
if (usec <= 0)
usec = 1;
else if (usec > 500000)
usec = 500000;
if (argc > 2) {
if (!strcmp(argv[2], "poll"))
method = M_POLL;
else if (!strcmp(argv[2], "usleep"))
method = M_USLEEP;
}
if (argc > 3)
total = atoi(argv[3]);
if (total < 1)
total = 1;
else if (total > 10)
total = 10;
fprintf(stderr, "testing %s for %dus over %ds\n",
names[method], usec, total);
gettimeofday(&ta, NULL);
prev = ta;
vals = calloc(LIM, sizeof(uint32_t));
for (;;) {
if (method == M_SELECT) {
struct timeval to = { 0, usec };
select(0, NULL, NULL, NULL, &to);
} else if (method == M_POLL) {
poll(NULL, 0, usec/1000);
} else {
usleep(usec);
}
gettimeofday(&tb, NULL);
timersub(&tb, &prev, &prev);
if (count < LIM)
vals[count] = prev.tv_usec;
count++;
prev = tb;
timersub(&tb, &ta, &tb);
if (tb.tv_sec > total)
break;
}
fprintf(stderr, "%dus actually took %dus\n",
usec, (int)(tb.tv_sec * 1000000 + tb.tv_usec) / count );
for (i = 0; i < count && i < LIM; i++)
fprintf(stdout, "%d\n", vals[i]);
return 0;
}

View File

@ -1,924 +0,0 @@
/*
* Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Id$
*
* Test program to study various ops and concurrency issues.
* Create multiple threads, possibly bind to cpus, and run a workload.
*
* cc -O2 -Werror -Wall testlock.c -o testlock -lpthread
* you might need -lrt
*/
#include <inttypes.h>
#include <sys/types.h>
#include <pthread.h> /* pthread_* */
#if defined(__APPLE__)
#include <net/if_var.h>
#include <libkern/OSAtomic.h>
#define atomic_add_int(p, n) OSAtomicAdd32(n, (int *)p)
#define atomic_cmpset_32(p, o, n) OSAtomicCompareAndSwap32(o, n, (int *)p)
#elif defined(linux)
int atomic_cmpset_32(volatile uint32_t *p, uint32_t old, uint32_t new)
{
int ret = *p == old;
*p = new;
return ret;
}
#if defined(HAVE_GCC_ATOMICS)
int atomic_add_int(volatile int *p, int v)
{
return __sync_fetch_and_add(p, v);
}
#else
inline
uint32_t atomic_add_int(uint32_t *p, int v)
{
__asm __volatile (
" lock xaddl %0, %1 ; "
: "+r" (v), /* 0 (result) */
"=m" (*p) /* 1 */
: "m" (*p)); /* 2 */
return (v);
}
#endif
#else /* FreeBSD */
#include <sys/param.h>
#include <machine/atomic.h>
#include <pthread_np.h> /* pthread w/ affinity */
#if __FreeBSD_version > 500000
#include <sys/cpuset.h> /* cpu_set */
#if __FreeBSD_version > 800000
#define HAVE_AFFINITY
#endif
#else /* FreeBSD 4.x */
int atomic_cmpset_32(volatile uint32_t *p, uint32_t old, uint32_t new)
{
int ret = *p == old;
*p = new;
return ret;
}
#define PRIu64 "llu"
#endif /* FreeBSD 4.x */
#endif /* FreeBSD */
#include <signal.h> /* signal */
#include <stdlib.h>
#include <stdio.h>
#include <poll.h>
#include <inttypes.h> /* PRI* macros */
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* getopt */
#include <sys/sysctl.h> /* sysctl */
#include <sys/time.h> /* timersub */
#define ONE_MILLION 1000000
/* debug support */
#define ND(format, ...)
#define D(format, ...) \
fprintf(stderr, "%s [%d] " format "\n", \
__FUNCTION__, __LINE__, ##__VA_ARGS__)
int verbose = 0;
#if 1//def MY_RDTSC
/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */
#define my_rdtsc(t) \
do { \
u_int __regs[4]; \
\
do_cpuid(0, __regs); \
(t) = rdtsc(); \
} while (0)
static __inline void
do_cpuid(u_int ax, u_int *p)
{
__asm __volatile("cpuid"
: "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
: "0" (ax) );
}
static __inline uint64_t
rdtsc(void)
{
uint64_t rv;
// XXX does not work on linux-64 bit
__asm __volatile("rdtscp" : "=A" (rv) : : "%rax");
return (rv);
}
#endif /* 1 */
struct targ;
/*** global arguments for all threads ***/
struct glob_arg {
struct {
uint32_t ctr[1024];
} v __attribute__ ((aligned(256) ));
int64_t m_cycles; /* total cycles */
int nthreads;
int cpus;
int privs; // 1 if has IO privileges
int arg; // microseconds in usleep
int nullfd; // open(/dev/null)
char *test_name;
void (*fn)(struct targ *);
uint64_t scale; // scaling factor
char *scale_name; // scaling factor
};
/*
* Arguments for a new thread.
*/
struct targ {
struct glob_arg *g;
int completed;
u_int *glob_ctr;
uint64_t volatile count;
struct timeval tic, toc;
int me;
pthread_t thread;
int affinity;
};
static struct targ *ta;
static int global_nthreads;
/* control-C handler */
static void
sigint_h(int sig)
{
int i;
(void)sig; /* UNUSED */
for (i = 0; i < global_nthreads; i++) {
/* cancel active threads. */
if (ta[i].completed)
continue;
D("Cancelling thread #%d\n", i);
pthread_cancel(ta[i].thread);
ta[i].completed = 0;
}
signal(SIGINT, SIG_DFL);
}
/* sysctl wrapper to return the number of active CPUs */
static int
system_ncpus(void)
{
#ifdef linux
return 1;
#else
int mib[2] = { CTL_HW, HW_NCPU}, ncpus;
size_t len = sizeof(mib);
sysctl(mib, len / sizeof(mib[0]), &ncpus, &len, NULL, 0);
D("system had %d cpus", ncpus);
return (ncpus);
#endif
}
/*
* try to get I/O privileges so we can execute cli/sti etc.
*/
int
getprivs(void)
{
int fd = open("/dev/io", O_RDWR);
if (fd < 0) {
D("cannot open /dev/io, fd %d", fd);
return 0;
}
return 1;
}
/* set the thread affinity. */
/* ARGSUSED */
#ifdef HAVE_AFFINITY
static int
setaffinity(pthread_t me, int i)
{
cpuset_t cpumask;
if (i == -1)
return 0;
/* Set thread affinity affinity.*/
CPU_ZERO(&cpumask);
CPU_SET(i, &cpumask);
if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
D("Unable to set affinity");
return 1;
}
return 0;
}
#endif
static void *
td_body(void *data)
{
struct targ *t = (struct targ *) data;
#ifdef HAVE_AFFINITY
if (0 == setaffinity(t->thread, t->affinity))
#endif
{
/* main loop.*/
D("testing %"PRIu64" cycles arg %d",
t->g->m_cycles, t->g->arg);
gettimeofday(&t->tic, NULL);
t->g->fn(t);
gettimeofday(&t->toc, NULL);
}
t->completed = 1;
return (NULL);
}
/*
* select and poll:
* arg fd timeout
* >0 block >0
* 0 block 0
* block NULL (not implemented)
* < -2 ready -arg
* -1 ready 0
* -2 ready NULL / <0 for poll
*
* arg = -1 -> NULL timeout (select)
*/
void
test_sel(struct targ *t)
{
int arg = t->g->arg;
// stdin is blocking on reads /dev/null or /dev/zero are not
int fd = (arg < 0) ? t->g->nullfd : 0;
fd_set r;
struct timeval t0 = { 0, arg};
struct timeval tcur, *tp = (arg == -2) ? NULL : &tcur;
int64_t m;
if (arg == -1)
t0.tv_usec = 0;
else if (arg < -2)
t0.tv_usec = -arg;
D("tp %p mode %s timeout %d", tp, arg < 0 ? "ready" : "block",
(int)t0.tv_usec);
for (m = 0; m < t->g->m_cycles; m++) {
int ret;
tcur = t0;
FD_ZERO(&r);
FD_SET(fd, &r);
ret = select(fd+1, &r, NULL, NULL, tp);
(void)ret;
ND("ret %d r %d w %d", ret,
FD_ISSET(fd, &r),
FD_ISSET(fd, &w));
t->count++;
}
}
void
test_poll(struct targ *t)
{
int arg = t->g->arg;
// stdin is blocking on reads /dev/null is not
int fd = (arg < 0) ? t->g->nullfd : 0;
int64_t m;
int ms;
if (arg == -1)
ms = 0;
else if (arg == -2)
ms = -1; /* blocking */
else if (arg < 0)
ms = -arg/1000;
else
ms = arg/1000;
D("mode %s timeout %d", arg < 0 ? "ready" : "block", ms);
for (m = 0; m < t->g->m_cycles; m++) {
struct pollfd x;
x.fd = fd;
x.events = POLLIN;
poll(&x, 1, ms);
t->count++;
}
}
void
test_usleep(struct targ *t)
{
int64_t m;
for (m = 0; m < t->g->m_cycles; m++) {
usleep(t->g->arg);
t->count++;
}
}
void
test_cli(struct targ *t)
{
int64_t m, i;
if (!t->g->privs) {
D("%s", "privileged instructions not available");
return;
}
for (m = 0; m < t->g->m_cycles; m++) {
for (i = 0; i < ONE_MILLION; i++) {
__asm __volatile("cli;");
__asm __volatile("and %eax, %eax;");
__asm __volatile("sti;");
t->count++;
}
}
}
void
test_nop(struct targ *t)
{
int64_t m, i;
for (m = 0; m < t->g->m_cycles; m++) {
for (i = 0; i < ONE_MILLION; i++) {
__asm __volatile("nop;");
__asm __volatile("nop; nop; nop; nop; nop;");
//__asm __volatile("nop; nop; nop; nop; nop;");
t->count++;
}
}
}
void
test_rdtsc1(struct targ *t)
{
int64_t m, i;
uint64_t v;
(void)v;
for (m = 0; m < t->g->m_cycles; m++) {
for (i = 0; i < ONE_MILLION; i++) {
my_rdtsc(v);
t->count++;
}
}
}
void
test_rdtsc(struct targ *t)
{
int64_t m, i;
volatile uint64_t v;
for (m = 0; m < t->g->m_cycles; m++) {
for (i = 0; i < ONE_MILLION; i++) {
v = rdtsc();
t->count++;
}
}
(void)v;
}
void
test_add(struct targ *t)
{
int64_t m, i;
for (m = 0; m < t->g->m_cycles; m++) {
for (i = 0; i < ONE_MILLION; i++) {
t->glob_ctr[0] ++;
t->count++;
}
}
}
void
test_atomic_add(struct targ *t)
{
int64_t m, i;
for (m = 0; m < t->g->m_cycles; m++) {
for (i = 0; i < ONE_MILLION; i++) {
atomic_add_int(t->glob_ctr, 1);
t->count++;
}
}
}
void
test_atomic_cmpset(struct targ *t)
{
int64_t m, i;
for (m = 0; m < t->g->m_cycles; m++) {
for (i = 0; i < ONE_MILLION; i++) {
atomic_cmpset_32(t->glob_ctr, m, i);
t->count++;
}
}
}
void
test_time(struct targ *t)
{
int64_t m;
for (m = 0; m < t->g->m_cycles; m++) {
#ifndef __APPLE__
struct timespec ts;
clock_gettime(t->g->arg, &ts);
#endif
t->count++;
}
}
void
test_gettimeofday(struct targ *t)
{
int64_t m;
struct timeval ts;
for (m = 0; m < t->g->m_cycles; m++) {
gettimeofday(&ts, NULL);
t->count++;
}
}
/*
* getppid is the simplest system call (getpid is cached by glibc
* so it would not be a good test)
*/
void
test_getpid(struct targ *t)
{
int64_t m;
for (m = 0; m < t->g->m_cycles; m++) {
getppid();
t->count++;
}
}
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
static void
fast_bcopy(void *_src, void *_dst, int l)
{
uint64_t *src = _src;
uint64_t *dst = _dst;
if (unlikely(l >= 1024)) {
bcopy(src, dst, l);
return;
}
for (; likely(l > 0); l-=64) {
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
static inline void
asmcopy(void *dst, void *src, uint64_t l)
{
(void)dst;
(void)src;
asm(
"\n\t"
"movq %0, %%rcx\n\t"
"addq $7, %%rcx\n\t"
"shrq $03, %%rcx\n\t"
"cld\n\t"
"movq %1, %%rdi\n\t"
"movq %2, %%rsi\n\t"
"repe movsq\n\t"
/* "movq %0, %%rcx\n\t"
"andq $0x7, %%rcx\n\t"
"repe movsb\n\t"
*/
: /* out */
: "r" (l), "r" (dst), "r" (src) /* in */
: "%rcx", "%rsi", "%rdi" /* clobbered */
);
}
// XXX if you want to make sure there is no inlining...
// static void (*fp)(void *_src, void *_dst, int l) = fast_bcopy;
#define HU 0x3ffff
static struct glob_arg huge[HU+1];
void
test_fastcopy(struct targ *t)
{
int64_t m;
int len = t->g->arg;
if (len > (int)sizeof(struct glob_arg))
len = sizeof(struct glob_arg);
D("fast copying %d bytes", len);
for (m = 0; m < t->g->m_cycles; m++) {
fast_bcopy(t->g, (void *)&huge[m & HU], len);
t->count+=1;
}
}
void
test_asmcopy(struct targ *t)
{
int64_t m;
int len = t->g->arg;
if (len > (int)sizeof(struct glob_arg))
len = sizeof(struct glob_arg);
D("fast copying %d bytes", len);
for (m = 0; m < t->g->m_cycles; m++) {
asmcopy((void *)&huge[m & HU], t->g, len);
t->count+=1;
}
}
void
test_bcopy(struct targ *t)
{
int64_t m;
int len = t->g->arg;
if (len > (int)sizeof(struct glob_arg))
len = sizeof(struct glob_arg);
D("bcopying %d bytes", len);
for (m = 0; m < t->g->m_cycles; m++) {
bcopy(t->g, (void *)&huge[m & HU], len);
t->count+=1;
}
}
void
test_builtin_memcpy(struct targ *t)
{
int64_t m;
int len = t->g->arg;
if (len > (int)sizeof(struct glob_arg))
len = sizeof(struct glob_arg);
D("bcopying %d bytes", len);
for (m = 0; m < t->g->m_cycles; m++) {
__builtin_memcpy((void *)&huge[m & HU], t->g, len);
t->count+=1;
}
}
void
test_memcpy(struct targ *t)
{
int64_t m;
int len = t->g->arg;
if (len > (int)sizeof(struct glob_arg))
len = sizeof(struct glob_arg);
D("memcopying %d bytes", len);
for (m = 0; m < t->g->m_cycles; m++) {
memcpy((void *)&huge[m & HU], t->g, len);
t->count+=1;
}
}
#include <sys/ioctl.h>
#include <sys/socket.h> // OSX
#include <net/if.h>
#include <net/netmap.h>
void
test_netmap(struct targ *t)
{
struct nmreq nmr;
int fd;
int64_t m, scale;
scale = t->g->m_cycles / 100;
fd = open("/dev/netmap", O_RDWR);
if (fd < 0) {
D("fail to open netmap, exit");
return;
}
bzero(&nmr, sizeof(nmr));
for (m = 0; m < t->g->m_cycles; m += scale) {
nmr.nr_version = 666;
nmr.nr_cmd = t->g->arg;
nmr.nr_offset = (uint32_t)scale;
ioctl(fd, NIOCGINFO, &nmr);
t->count += scale;
}
return;
}
struct entry {
void (*fn)(struct targ *);
char *name;
uint64_t scale;
uint64_t m_cycles;
};
struct entry tests[] = {
{ test_sel, "select", 1, 1000 },
{ test_poll, "poll", 1, 1000 },
{ test_usleep, "usleep", 1, 1000 },
{ test_time, "time", 1, 1000 },
{ test_gettimeofday, "gettimeofday", 1, 1000000 },
{ test_getpid, "getpid", 1, 1000000 },
{ test_bcopy, "bcopy", 1000, 100000000 },
{ test_builtin_memcpy, "__builtin_memcpy", 1000, 100000000 },
{ test_memcpy, "memcpy", 1000, 100000000 },
{ test_fastcopy, "fastcopy", 1000, 100000000 },
{ test_asmcopy, "asmcopy", 1000, 100000000 },
{ test_add, "add", ONE_MILLION, 100000000 },
{ test_nop, "nop", ONE_MILLION, 100000000 },
{ test_atomic_add, "atomic-add", ONE_MILLION, 100000000 },
{ test_cli, "cli", ONE_MILLION, 100000000 },
{ test_rdtsc, "rdtsc", ONE_MILLION, 100000000 }, // unserialized
{ test_rdtsc1, "rdtsc1", ONE_MILLION, 100000000 }, // serialized
{ test_atomic_cmpset, "cmpset", ONE_MILLION, 100000000 },
{ test_netmap, "netmap", 1000, 100000000 },
{ NULL, NULL, 0, 0 }
};
static void
usage(void)
{
const char *cmd = "test";
int i;
fprintf(stderr,
"Usage:\n"
"%s arguments\n"
"\t-m name test name\n"
"\t-n cycles (millions) of cycles\n"
"\t-l arg bytes, usec, ... \n"
"\t-t threads total threads\n"
"\t-c cores cores to use\n"
"\t-a n force affinity every n cores\n"
"\t-A n cache contention every n bytes\n"
"\t-w report_ms milliseconds between reports\n"
"",
cmd);
fprintf(stderr, "Available tests:\n");
for (i = 0; tests[i].name; i++) {
fprintf(stderr, "%12s\n", tests[i].name);
}
exit(0);
}
static int64_t
getnum(const char *s)
{
int64_t n;
char *e;
n = strtol(s, &e, 0);
switch (e ? *e : '\0') {
case 'k':
case 'K':
return n*1000;
case 'm':
case 'M':
return n*1000*1000;
case 'g':
case 'G':
return n*1000*1000*1000;
case 't':
case 'T':
return n*1000*1000*1000*1000;
default:
return n;
}
}
struct glob_arg g;
int
main(int argc, char **argv)
{
int i, ch, report_interval, affinity, align;
ND("g has size %d", (int)sizeof(g));
report_interval = 250; /* ms */
affinity = 0; /* no affinity */
align = 0; /* global variable */
bzero(&g, sizeof(g));
g.privs = getprivs();
g.nthreads = 1;
g.cpus = 1;
g.m_cycles = 0;
g.nullfd = open("/dev/zero", O_RDWR);
D("nullfd is %d", g.nullfd);
while ( (ch = getopt(argc, argv, "A:a:m:n:w:c:t:vl:")) != -1) {
switch(ch) {
default:
D("bad option %c %s", ch, optarg);
usage();
break;
case 'A': /* align */
align = atoi(optarg);
break;
case 'a': /* force affinity */
affinity = atoi(optarg);
break;
case 'n': /* cycles */
g.m_cycles = getnum(optarg);
break;
case 'w': /* report interval */
report_interval = atoi(optarg);
break;
case 'c':
g.cpus = atoi(optarg);
break;
case 't':
g.nthreads = atoi(optarg);
break;
case 'm':
g.test_name = optarg;
break;
case 'l':
g.arg = getnum(optarg);
break;
case 'v':
verbose++;
break;
}
}
argc -= optind;
argv += optind;
if (!g.test_name && argc > 0)
g.test_name = argv[0];
if (g.test_name) {
for (i = 0; tests[i].name; i++) {
if (!strcmp(g.test_name, tests[i].name)) {
g.fn = tests[i].fn;
g.scale = tests[i].scale;
if (g.m_cycles == 0)
g.m_cycles = tests[i].m_cycles;
if (g.scale == ONE_MILLION)
g.scale_name = "M";
else if (g.scale == 1000)
g.scale_name = "K";
else {
g.scale = 1;
g.scale_name = "";
}
break;
}
}
}
if (!g.fn) {
D("%s", "missing/unknown test name");
usage();
}
i = system_ncpus();
if (g.cpus < 0 || g.cpus > i) {
D("%d cpus is too high, have only %d cpus", g.cpus, i);
usage();
}
if (g.cpus == 0)
g.cpus = i;
if (g.nthreads < 1) {
D("bad nthreads %d, using 1", g.nthreads);
g.nthreads = 1;
}
i = sizeof(g.v.ctr) / g.nthreads*sizeof(g.v.ctr[0]);
if (align < 0 || align > i) {
D("bad align %d, max is %d", align, i);
align = i;
}
/* Install ^C handler. */
global_nthreads = g.nthreads;
signal(SIGINT, sigint_h);
ta = calloc(g.nthreads, sizeof(*ta));
/*
* Now create the desired number of threads, each one
* using a single descriptor.
*/
D("start %d threads on %d cores", g.nthreads, g.cpus);
for (i = 0; i < g.nthreads; i++) {
struct targ *t = &ta[i];
bzero(t, sizeof(*t));
t->g = &g;
t->me = i;
t->glob_ctr = &g.v.ctr[(i*align)/sizeof(g.v.ctr[0])];
D("thread %d ptr %p", i, t->glob_ctr);
t->affinity = affinity ? (affinity*i) % g.cpus : -1;
if (pthread_create(&t->thread, NULL, td_body, t) == -1) {
D("Unable to create thread %d", i);
t->completed = 1;
}
}
/* the main loop */
{
uint64_t my_count = 0, prev = 0;
uint64_t count = 0;
double delta_t;
struct timeval tic, toc;
gettimeofday(&toc, NULL);
for (;;) {
struct timeval now, delta;
uint64_t pps;
int done = 0;
delta.tv_sec = report_interval/1000;
delta.tv_usec = (report_interval%1000)*1000;
select(0, NULL, NULL, NULL, &delta);
gettimeofday(&now, NULL);
timersub(&now, &toc, &toc);
my_count = 0;
for (i = 0; i < g.nthreads; i++) {
my_count += ta[i].count;
if (ta[i].completed)
done++;
}
pps = toc.tv_sec* ONE_MILLION + toc.tv_usec;
if (pps < 10000)
continue;
pps = (my_count - prev)*ONE_MILLION / pps;
D("%" PRIu64 " %scycles/s scale %" PRIu64 " in %dus", pps/g.scale,
g.scale_name, g.scale, (int)(toc.tv_sec* ONE_MILLION + toc.tv_usec));
prev = my_count;
toc = now;
if (done == g.nthreads)
break;
}
D("total %" PRIu64 " cycles", prev);
timerclear(&tic);
timerclear(&toc);
for (i = 0; i < g.nthreads; i++) {
pthread_join(ta[i].thread, NULL);
if (ta[i].completed == 0)
continue;
/*
* Collect threads o1utput and extract information about
* how log it took to send all the packets.
*/
count += ta[i].count;
if (!timerisset(&tic) || timercmp(&ta[i].tic, &tic, <))
tic = ta[i].tic;
if (!timerisset(&toc) || timercmp(&ta[i].toc, &toc, >))
toc = ta[i].toc;
}
/* print output. */
timersub(&toc, &tic, &toc);
delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
D("total %8.6f seconds", delta_t);
}
return (0);
}
/* end of file */

File diff suppressed because it is too large Load Diff

View File

@ -1,172 +0,0 @@
/*
* Copyright (C) 2013-2014 Michio Honda. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $FreeBSD$ */
#include <errno.h>
#include <stdio.h>
#include <inttypes.h> /* PRI* macros */
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* close */
#include <sys/ioctl.h> /* ioctl */
#include <sys/param.h>
#include <sys/socket.h> /* apple needs sockaddr */
#include <net/if.h> /* ifreq */
#include <net/netmap.h>
#include <net/netmap_user.h>
#include <libgen.h> /* basename */
/* debug support */
#define ND(format, ...) do {} while(0)
#define D(format, ...) \
fprintf(stderr, "%s [%d] " format "\n", \
__FUNCTION__, __LINE__, ##__VA_ARGS__)
static int
bdg_ctl(const char *name, int nr_cmd, int nr_arg)
{
struct nmreq nmr;
int error = 0;
int fd = open("/dev/netmap", O_RDWR);
if (fd == -1) {
D("Unable to open /dev/netmap");
return -1;
}
bzero(&nmr, sizeof(nmr));
nmr.nr_version = NETMAP_API;
if (name != NULL) /* might be NULL */
strncpy(nmr.nr_name, name, sizeof(nmr.nr_name));
nmr.nr_cmd = nr_cmd;
switch (nr_cmd) {
case NETMAP_BDG_ATTACH:
case NETMAP_BDG_DETACH:
if (nr_arg && nr_arg != NETMAP_BDG_HOST)
nr_arg = 0;
nmr.nr_arg1 = nr_arg;
error = ioctl(fd, NIOCREGIF, &nmr);
if (error == -1) {
ND("Unable to %s %s to the bridge", nr_cmd ==
NETMAP_BDG_DETACH?"detach":"attach", name);
perror(name);
} else
ND("Success to %s %s to the bridge", nr_cmd ==
NETMAP_BDG_DETACH?"detach":"attach", name);
break;
case NETMAP_BDG_LIST:
if (strlen(nmr.nr_name)) { /* name to bridge/port info */
error = ioctl(fd, NIOCGINFO, &nmr);
if (error) {
ND("Unable to obtain info for %s", name);
perror(name);
} else
D("%s at bridge:%d port:%d", name, nmr.nr_arg1,
nmr.nr_arg2);
break;
}
/* scan all the bridges and ports */
nmr.nr_arg1 = nmr.nr_arg2 = 0;
for (; !ioctl(fd, NIOCGINFO, &nmr); nmr.nr_arg2++) {
D("bridge:%d port:%d %s", nmr.nr_arg1, nmr.nr_arg2,
nmr.nr_name);
nmr.nr_name[0] = '\0';
}
break;
default: /* GINFO */
nmr.nr_cmd = nmr.nr_arg1 = nmr.nr_arg2 = 0;
error = ioctl(fd, NIOCGINFO, &nmr);
if (error) {
ND("Unable to get if info for %s", name);
perror(name);
} else
D("%s: %d queues.", name, nmr.nr_rx_rings);
break;
}
close(fd);
return error;
}
int
main(int argc, char *argv[])
{
int ch, nr_cmd = 0, nr_arg = 0;
const char *command = basename(argv[0]);
char *name = NULL;
if (argc > 3) {
usage:
fprintf(stderr,
"Usage:\n"
"%s arguments\n"
"\t-g interface interface name to get info\n"
"\t-d interface interface name to be detached\n"
"\t-a interface interface name to be attached\n"
"\t-h interface interface name to be attached with the host stack\n"
"\t-l list all or specified bridge's interfaces (default)\n"
"", command);
return 0;
}
while ((ch = getopt(argc, argv, "d:a:h:g:l")) != -1) {
name = optarg; /* default */
switch (ch) {
default:
fprintf(stderr, "bad option %c %s", ch, optarg);
goto usage;
case 'd':
nr_cmd = NETMAP_BDG_DETACH;
break;
case 'a':
nr_cmd = NETMAP_BDG_ATTACH;
break;
case 'h':
nr_cmd = NETMAP_BDG_ATTACH;
nr_arg = NETMAP_BDG_HOST;
break;
case 'g':
nr_cmd = 0;
break;
case 'l':
nr_cmd = NETMAP_BDG_LIST;
if (optind < argc && argv[optind][0] == '-')
name = NULL;
break;
}
if (optind != argc) {
// fprintf(stderr, "optind %d argc %d\n", optind, argc);
goto usage;
}
}
if (argc == 1)
nr_cmd = NETMAP_BDG_LIST;
return bdg_ctl(name, nr_cmd, nr_arg) ? 1 : 0;
}

View File

@ -1,95 +0,0 @@
diff --git a/src/PktSrc.cc b/src/PktSrc.cc
index 9d6bce6..e8f59dd 100644
--- a/src/PktSrc.cc
+++ b/src/PktSrc.cc
@@ -11,6 +11,26 @@
#include "Net.h"
#include "Sessions.h"
+#define HAVE_NETMAP
+
+#ifdef HAVE_NETMAP
+
+// Compile in netmap support. If the interface name starts with
+// "netmap:" or "vale" we use a netmap fd instead of pcap, and bind
+// one or all rings depending on NETMAP_RING_ID environment variable.
+//
+// For a test run you can use the vale switch,
+// pkt-gen -i vale1:b -f tx -R ..rate_in_pps
+// and launch bro like this
+/*
+
+BROPATH=`./bro-path-dev` ./src/bro -i vale1:a -b -e 'global l=0; event p(){local s=net_stats(); local c=s$pkts_recvd;print c-l;l=c; schedule 1 sec {p()};} event bro_init(){event p();}'
+
+ */
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+
+#endif /* HAVE_NETMAP */
// ### This needs auto-confing.
#ifdef HAVE_PCAP_INT_H
@@ -75,7 +95,14 @@ int PktSrc::ExtractNextPacket()
return 0;
}
+#ifdef HAVE_NETMAP
+ // in netmap mode call netmap equivalent of pcap_next()
+ if (IS_NETMAP_DESC(pd))
+ data = last_data = nm_nextpkt((struct nm_desc *)pd,
+ (struct nm_pkthdr *)&hdr);
+ else
+#endif /* HAVE_NETMAP */
data = last_data = pcap_next(pd, &hdr);
if ( data && (hdr.len == 0 || hdr.caplen == 0) )
{
@@ -407,6 +435,11 @@ void PktSrc::Close()
{
if ( pd )
{
+#ifdef HAVE_NETMAP
+ if (IS_NETMAP_DESC(pd))
+ nm_close((struct nm_desc *)pd);
+ else
+#endif /* HAVE_NETMAP */
pcap_close(pd);
pd = 0;
closed = true;
@@ -443,6 +476,14 @@ void PktSrc::Statistics(Stats* s)
else
{
struct pcap_stat pstat;
+#ifdef HAVE_NETMAP
+ if (IS_NETMAP_DESC(pd))
+ {
+ s->dropped = stats.dropped;
+ s->link = stats.received;
+ }
+ else
+#endif /* HAVE_NETMAP */
if ( pcap_stats(pd, &pstat) < 0 )
{
reporter->Error("problem getting packet filter statistics: %s",
@@ -482,6 +523,21 @@ PktInterfaceSrc::PktInterfaceSrc(const char* arg_interface, const char* filter,
interface = copy_string(arg_interface);
+#ifdef HAVE_NETMAP
+ pd = (pcap_t *)nm_open(interface, getenv("NETMAP_RING_ID"), 0, 0);
+ // netmap interfaces are named netmap:* or vale*
+ // If pd == 0 && errno == 0 "interface" is not a valid
+ // netmap interface name, so we fall through to pcap
+ if (pd || errno > 0)
+ {
+ if (pd)
+ selectable_fd = NETMAP_FD(pd);
+ else
+ closed = true;
+ return;
+ }
+#endif /* HAVE_NETMAP */
+
// Determine network and netmask.
uint32 net;
if ( pcap_lookupnet(interface, &net, &netmask, tmp_errbuf) < 0 )

View File

@ -1,389 +0,0 @@
diff --git a/Makefile.in b/Makefile.in
index 9995458..c670d66 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -83,7 +83,7 @@ YACC = @V_YACC@
@rm -f $@
$(CC) $(FULL_CFLAGS) -c $(srcdir)/$*.c
-PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@ @DBUS_SRC@
+PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@ @DBUS_SRC@ @NETMAP_SRC@
FSRC = fad-@V_FINDALLDEVS@.c
SSRC = @SSRC@
CSRC = pcap.c inet.c gencode.c optimize.c nametoaddr.c etherent.c \
@@ -313,6 +313,7 @@ EXTRA_DIST = \
pcap-namedb.h \
pcap-netfilter-linux.c \
pcap-netfilter-linux.h \
+ pcap-netmap.c \
pcap-nit.c \
pcap-null.c \
pcap-pf.c \
diff --git a/config.h.in b/config.h.in
index c6bc68e..09c8557 100644
--- a/config.h.in
+++ b/config.h.in
@@ -268,6 +268,9 @@
/* target host supports netfilter sniffing */
#undef PCAP_SUPPORT_NETFILTER
+/* target host supports netmap */
+#undef PCAP_SUPPORT_NETMAP
+
/* target host supports USB sniffing */
#undef PCAP_SUPPORT_USB
diff --git a/configure b/configure
index be87668..a8d0cae 100755
--- a/configure
+++ b/configure
@@ -626,6 +626,8 @@ INSTALL_PROGRAM
DBUS_SRC
PCAP_SUPPORT_DBUS
PKGCONFIG
+NETMAP_SRC
+PCAP_SUPPORT_NETMAP
CAN_SRC
PCAP_SUPPORT_CAN
CANUSB_SRC
@@ -747,6 +749,7 @@ enable_shared
enable_bluetooth
enable_canusb
enable_can
+enable_netmap
enable_dbus
'
ac_precious_vars='build_alias
@@ -1385,6 +1388,8 @@ Optional Features:
available]
--enable-can enable CAN support [default=yes, if support
available]
+ --enable-netmap enable netmap support [default=yes, if support
+ available]
--enable-dbus enable D-Bus capture support [default=yes, if
support available]
@@ -8148,6 +8153,39 @@ $as_echo "$as_me: no CAN sniffing support implemented for $host_os" >&6;}
fi
+# Check whether --enable-netmap was given.
+if test "${enable_netmap+set}" = set; then :
+ enableval=$enable_netmap;
+else
+ enable_netmap=yes
+fi
+
+
+if test "x$enable_netmap" != "xno" ; then
+ case "$host_os" in
+ *)
+ ac_fn_c_check_header_compile "$LINENO" "net/netmap_user.h" "ac_cv_header_net_netmap_user_h" "#include <net/netmap_user.h>
+
+"
+if test "x$ac_cv_header_net_netmap_user_h" = xyes; then :
+
+$as_echo "#define PCAP_SUPPORT_NETMAP 1" >>confdefs.h
+
+ NETMAP_SRC=pcap-netmap.c
+ { $as_echo "$as_me:${as_lineno-$LINENO}: netmap is supported" >&5
+$as_echo "$as_me: netmap is supported" >&6;}
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: netmap is not supported" >&5
+$as_echo "$as_me: netmap is not supported" >&6;}
+fi
+
+
+ ;;
+ esac
+
+
+fi
+
# Check whether --enable-dbus was given.
if test "${enable_dbus+set}" = set; then :
enableval=$enable_dbus;
diff --git a/configure.in b/configure.in
index f0aa2c5..55464ba 100644
--- a/configure.in
+++ b/configure.in
@@ -1550,6 +1550,28 @@ if test "x$enable_can" != "xno" ; then
AC_SUBST(CAN_SRC)
fi
+AC_ARG_ENABLE([netmap],
+[AC_HELP_STRING([--enable-netmap],[enable netmap support @<:@default=yes, if support available@:>@])],
+ [],
+ [enable_netmap=yes])
+
+if test "x$enable_netmap" != "xno" ; then
+ dnl check for netmap support
+ case "$host_os" in
+ *)
+ AC_CHECK_HEADER(net/netmap_user.h,
+ [ AC_DEFINE(PCAP_SUPPORT_NETMAP, 1, [target host supports netmap])
+ NETMAP_SRC=pcap-netmap.c
+ AC_MSG_NOTICE(netmap is supported)],
+ AC_MSG_NOTICE(netmap is not supported),
+ [#include <net/netmap_user.h>]
+ )
+ ;;
+ esac
+ AC_SUBST(PCAP_SUPPORT_NETMAP)
+ AC_SUBST(NETMAP_SRC)
+fi
+
AC_ARG_ENABLE([dbus],
[AC_HELP_STRING([--enable-dbus],[enable D-Bus capture support @<:@default=yes, if support available@:>@])],
[],
diff --git a/inet.c b/inet.c
index c699658..d132507 100644
--- a/inet.c
+++ b/inet.c
@@ -883,6 +883,10 @@ pcap_lookupnet(device, netp, maskp, errbuf)
#ifdef PCAP_SUPPORT_USB
|| strstr(device, "usbmon") != NULL
#endif
+#ifdef PCAP_SUPPORT_NETMAP
+ || !strncmp(device, "netmap:", 7)
+ || !strncmp(device, "vale", 4)
+#endif
#ifdef HAVE_SNF_API
|| strstr(device, "snf") != NULL
#endif
diff --git a/pcap-netmap.c b/pcap-netmap.c
new file mode 100644
index 0000000..2568c2f
--- /dev/null
+++ b/pcap-netmap.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2014 Universita` di Pisa
+ *
+ * packet filter subroutines for netmap
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <poll.h>
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+
+#include "pcap-int.h"
+
+#if defined (linux)
+/* On FreeBSD we use IFF_PPROMISC which is in ifr_flagshigh.
+ * remap to IFF_PROMISC on linux
+ */
+#define IFF_PPROMISC IFF_PROMISC
+#define ifr_flagshigh ifr_flags
+#endif /* linux */
+
+struct pcap_netmap {
+ struct nm_desc *d; /* pointer returned by nm_open() */
+ pcap_handler cb; /* callback and argument */
+ u_char *cb_arg;
+ int must_clear_promisc; /* flag */
+ uint64_t rx_pkts; /* count of packets received before the filter */
+};
+
+static int
+pcap_netmap_stats(pcap_t *p, struct pcap_stat *ps)
+{
+ struct pcap_netmap *pn = p->priv;
+
+ ps->ps_recv = pn->rx_pkts;
+ ps->ps_drop = 0;
+ ps->ps_ifdrop = 0;
+ return 0;
+}
+
+static void
+pcap_netmap_filter(u_char *arg, struct pcap_pkthdr *h, const u_char *buf)
+{
+ pcap_t *p = (pcap_t *)arg;
+ struct pcap_netmap *pn = p->priv;
+
+ ++pn->rx_pkts;
+ if (bpf_filter(p->fcode.bf_insns, buf, h->len, h->caplen))
+ pn->cb(pn->cb_arg, h, buf);
+}
+
+static int
+pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user)
+{
+ int ret;
+ struct pcap_netmap *pn = p->priv;
+ struct nm_desc *d = pn->d;
+ struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 };
+
+ pn->cb = cb;
+ pn->cb_arg = user;
+
+ for (;;) {
+ if (p->break_loop) {
+ p->break_loop = 0;
+ return PCAP_ERROR_BREAK;
+ }
+ /* nm_dispatch won't run forever */
+ ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p);
+ if (ret != 0)
+ break;
+ poll(&pfd, 1, p->opt.timeout);
+ }
+ return ret;
+}
+
+/* XXX need to check the NIOCTXSYNC/poll */
+static int
+pcap_netmap_inject(pcap_t *p, const void *buf, size_t size)
+{
+ struct nm_desc *d = ((struct pcap_netmap *)p->priv)->d;
+
+ return nm_inject(d, buf, size);
+}
+
+static int
+pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags)
+{
+ struct pcap_netmap *pn = p->priv;
+ struct nm_desc *d = pn->d;
+ struct ifreq ifr;
+ int error, fd = d->fd;
+
+#ifdef linux
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Error: cannot get device control socket.\n");
+ return -1;
+ }
+#endif /* linux */
+ bzero(&ifr, sizeof(ifr));
+ strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name));
+ switch (what) {
+ case SIOCSIFFLAGS:
+ ifr.ifr_flags = *if_flags;
+ ifr.ifr_flagshigh = *if_flags >> 16;
+ break;
+ }
+ error = ioctl(fd, what, &ifr);
+ fprintf(stderr, "%s %s ioctl 0x%lx returns %d\n", __FUNCTION__,
+ d->req.nr_name, what, error);
+ if (error)
+ return -1;
+ switch (what) {
+ case SIOCGIFFLAGS:
+ *if_flags = ifr.ifr_flags | (ifr.ifr_flagshigh << 16);
+ }
+ return 0;
+}
+
+static void
+pcap_netmap_close(pcap_t *p)
+{
+ struct pcap_netmap *pn = p->priv;
+ struct nm_desc *d = pn->d;
+ uint32_t if_flags = 0;
+
+ if (pn->must_clear_promisc) {
+ pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
+ if (if_flags & IFF_PPROMISC) {
+ if_flags &= ~IFF_PPROMISC;
+ pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
+ }
+ }
+ nm_close(d);
+}
+
+static int
+pcap_netmap_activate(pcap_t *p)
+{
+ struct pcap_netmap *pn = p->priv;
+ struct nm_desc *d = nm_open(p->opt.source, NULL, 0, NULL);
+ uint32_t if_flags = 0;
+
+ if (d == NULL) {
+ snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
+ "netmap open: cannot access %s: %s\n",
+ p->opt.source, pcap_strerror(errno));
+ goto bad;
+ }
+ fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n",
+ __FUNCTION__, p->opt.source, d, d->fd, d->first_rx_ring, d->last_rx_ring);
+ pn->d = d;
+ p->fd = d->fd;
+ if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) {
+ pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */
+ if (!(if_flags & IFF_PPROMISC)) {
+ pn->must_clear_promisc = 1;
+ if_flags |= IFF_PPROMISC;
+ pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags);
+ }
+ }
+ p->linktype = DLT_EN10MB;
+ p->selectable_fd = p->fd;
+ p->read_op = pcap_netmap_dispatch;
+ p->inject_op = pcap_netmap_inject,
+ p->setfilter_op = install_bpf_program;
+ p->setdirection_op = NULL;
+ p->set_datalink_op = NULL;
+ p->getnonblock_op = pcap_getnonblock_fd;
+ p->setnonblock_op = pcap_setnonblock_fd;
+ p->stats_op = pcap_netmap_stats;
+ p->cleanup_op = pcap_netmap_close;
+ return (0);
+
+ bad:
+ pcap_cleanup_live_common(p);
+ return (PCAP_ERROR);
+}
+
+pcap_t *
+pcap_netmap_create(const char *device, char *ebuf, int *is_ours)
+{
+ pcap_t *p;
+
+ *is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4));
+ if (! *is_ours)
+ return NULL;
+ p = pcap_create_common(device, ebuf, sizeof (struct pcap_netmap));
+ if (p == NULL)
+ return (NULL);
+ p->activate_op = pcap_netmap_activate;
+ return (p);
+}
diff --git a/pcap.c b/pcap.c
index b2b5da6..beda714 100644
--- a/pcap.c
+++ b/pcap.c
@@ -104,6 +104,10 @@
#include "pcap-dbus.h"
#endif
+#ifdef PCAP_SUPPORT_NETMAP
+pcap_t* pcap_netmap_create(const char *device, char *ebuf, int *is_ours);
+#endif
+
int
pcap_not_initialized(pcap_t *pcap _U_)
{
@@ -307,6 +311,9 @@ struct capture_source_type {
int (*findalldevs_op)(pcap_if_t **, char *);
pcap_t *(*create_op)(const char *, char *, int *);
} capture_source_types[] = {
+#ifdef PCAP_SUPPORT_NETMAP
+ { NULL, pcap_netmap_create },
+#endif
#ifdef HAVE_DAG_API
{ dag_findalldevs, dag_create },
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,334 +0,0 @@
/*
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 238985 2012-08-02 11:59:43Z luigi $
*
* netmap support for: em.
*
* For more details on netmap support please see ixgbe_netmap.h
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
// XXX do we need to block/unblock the tasks ?
static void
em_netmap_block_tasks(struct adapter *adapter)
{
if (adapter->msix > 1) { /* MSIX */
int i;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
taskqueue_block(txr->tq);
taskqueue_drain(txr->tq, &txr->tx_task);
taskqueue_block(rxr->tq);
taskqueue_drain(rxr->tq, &rxr->rx_task);
}
} else { /* legacy */
taskqueue_block(adapter->tq);
taskqueue_drain(adapter->tq, &adapter->link_task);
taskqueue_drain(adapter->tq, &adapter->que_task);
}
}
static void
em_netmap_unblock_tasks(struct adapter *adapter)
{
if (adapter->msix > 1) {
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int i;
for (i = 0; i < adapter->num_queues; i++) {
taskqueue_unblock(txr->tq);
taskqueue_unblock(rxr->tq);
}
} else { /* legacy */
taskqueue_unblock(adapter->tq);
}
}
/*
* Register/unregister. We are already under netmap lock.
*/
static int
em_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
em_netmap_block_tasks(adapter);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
em_init_locked(adapter); /* also enable intr */
em_netmap_unblock_tasks(adapter);
EM_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
em_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
struct em_buffer *txbuf = &txr->tx_buffers[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(paddr);
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data = htole32(adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
if (nic_i != txr->next_to_clean) {
txr->next_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
em_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) { // XXX no need to count
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->status);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->length);
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
/* make sure next_to_refresh follows next_to_check */
rxr->next_to_refresh = nic_i; // XXX
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_check = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
static void
em_netmap_attach(struct adapter *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
na.num_tx_desc = adapter->num_tx_desc;
na.num_rx_desc = adapter->num_rx_desc;
na.nm_txsync = em_netmap_txsync;
na.nm_rxsync = em_netmap_rxsync;
na.nm_register = em_netmap_reg;
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,314 +0,0 @@
/*
* Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/if_igb_netmap.h 256200 2013-10-09 17:32:52Z jfv $
*
* Netmap support for igb, partly contributed by Ahmed Kooli
* For details on netmap support please see ixgbe_netmap.h
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
/*
* Adaptation to different versions of the driver.
*/
#ifndef IGB_MEDIA_RESET
/* at the same time as IGB_MEDIA_RESET was defined, the
* tx buffer descriptor was renamed, so use this to revert
* back to the old name.
*/
#define igb_tx_buf igb_tx_buffer
#endif
/*
* Register/unregister. We are already under netmap lock.
*/
static int
igb_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
IGB_CORE_LOCK(adapter);
igb_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
igb_init_locked(adapter); /* also enable intr */
IGB_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
igb_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
/* 82575 needs the queue index added */
u32 olinfo_status =
(adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
union e1000_adv_tx_desc *curr =
(union e1000_adv_tx_desc *)&txr->tx_base[nic_i];
struct igb_tx_buf *txbuf = &txr->tx_buffers[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_ADVTXD_DCMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->read.buffer_addr = htole64(paddr);
// XXX check olinfo and cmd_type_len
curr->read.olinfo_status =
htole32(olinfo_status |
(len<< E1000_ADVTXD_PAYLEN_SHIFT));
curr->read.cmd_type_len =
htole32(len | E1000_ADVTXD_DTYP_DATA |
E1000_ADVTXD_DCMD_IFCS |
E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_EOP | flags);
/* make sure changes to the buffer are synced */
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
/* Set the watchdog XXX ? */
txr->queue_status = IGB_QUEUE_WORKING;
txr->watchdog_time = ticks;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
txr->next_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
igb_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_check = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
struct igb_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->wb.upper.status_error = 0;
curr->read.pkt_addr = htole64(paddr);
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
static void
igb_netmap_attach(struct adapter *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
na.num_tx_desc = adapter->num_tx_desc;
na.num_rx_desc = adapter->num_rx_desc;
na.nm_txsync = igb_netmap_txsync;
na.nm_rxsync = igb_netmap_rxsync;
na.nm_register = igb_netmap_reg;
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,311 +0,0 @@
/*
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/if_lem_netmap.h 231881 2012-02-17 14:09:04Z luigi $
*
* netmap support for: lem
*
* For details on netmap support please see ixgbe_netmap.h
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
/*
* Register/unregister. We are already under netmap lock.
*/
static int
lem_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
EM_CORE_LOCK(adapter);
lem_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
#ifndef EM_LEGACY_IRQ // XXX do we need this ?
taskqueue_block(adapter->tq);
taskqueue_drain(adapter->tq, &adapter->rxtx_task);
taskqueue_drain(adapter->tq, &adapter->link_task);
#endif /* !EM_LEGCY_IRQ */
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
lem_init_locked(adapter); /* also enable intr */
#ifndef EM_LEGACY_IRQ
taskqueue_unblock(adapter->tq); // XXX do we need this ?
#endif /* !EM_LEGCY_IRQ */
EM_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
lem_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
while (nm_i != head) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i];
struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(adapter->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data = htole32(adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
bus_dmamap_sync(adapter->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
kring->last_reclaim = ticks;
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
adapter->next_tx_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
lem_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = adapter->next_rx_desc_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
uint32_t staterr = le32toh(curr->status);
int len;
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
len = le16toh(curr->length) - 4; // CRC
if (len < 0) {
D("bogus pkt size %d nic idx %d", len, nic_i);
len = 0;
}
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(adapter->rxtag,
adapter->rx_buffer_area[nic_i].map,
BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
ND("%d new packets at nic %d nm %d tail %d",
n,
adapter->next_rx_desc_to_check,
netmap_idx_n2k(kring, adapter->next_rx_desc_to_check),
kring->nr_hwtail);
adapter->next_rx_desc_to_check = nic_i;
// ifp->if_ipackets += n;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
static void
lem_netmap_attach(struct adapter *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
na.num_tx_desc = adapter->num_tx_desc;
na.num_rx_desc = adapter->num_rx_desc;
na.nm_txsync = lem_netmap_txsync;
na.nm_rxsync = lem_netmap_rxsync;
na.nm_register = lem_netmap_reg;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,391 +0,0 @@
/*
* Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 231881 2012-02-17 14:09:04Z luigi $
*
* netmap support for: nfe XXX not yet tested.
*
* For more details on netmap support please see ixgbe_netmap.h
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <dev/netmap/netmap_kern.h>
static int
nfe_netmap_init_buffers(struct nfe_softc *sc)
{
struct netmap_adapter *na = NA(sc->nfe_ifp);
struct netmap_slot *slot;
int i, l, n, max_avail;
struct nfe_desc32 *desc32 = NULL;
struct nfe_desc64 *desc64 = NULL;
void *addr;
uint64_t paddr;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return 0;
}
slot = netmap_reset(na, NR_TX, 0, 0);
if (!slot)
return 0; // XXX cannot happen
// XXX init the tx ring
n = NFE_TX_RING_COUNT;
for (i = 0; i < n; i++) {
l = netmap_idx_n2k(&na->tx_rings[0], i);
addr = PNMB(slot + l, &paddr);
netmap_reload_map(sc->txq.tx_data_tag,
sc->txq.data[l].tx_data_map, addr);
slot[l].flags = 0;
if (sc->nfe_flags & NFE_40BIT_ADDR) {
desc64 = &sc->txq.desc64[l];
desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
desc64->vtag = 0;
desc64->length = htole16(0);
desc64->flags = htole16(0);
} else {
desc32 = &sc->txq.desc32[l];
desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
desc32->length = htole16(0);
desc32->flags = htole16(0);
}
}
slot = netmap_reset(na, NR_RX, 0, 0);
// XXX init the rx ring
/*
* preserve buffers still owned by the driver (and keep one empty).
*/
n = NFE_RX_RING_COUNT;
max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
for (i = 0; i < n; i++) {
uint16_t flags;
l = netmap_idx_n2k(&na->rx_rings[0], i);
addr = PNMB(slot + l, &paddr);
flags = (i < max_avail) ? NFE_RX_READY : 0;
if (sc->nfe_flags & NFE_40BIT_ADDR) {
desc64 = &sc->rxq.desc64[l];
desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
desc64->vtag = 0;
desc64->length = htole16(NETMAP_BUF_SIZE);
desc64->flags = htole16(NFE_RX_READY);
} else {
desc32 = &sc->rxq.desc32[l];
desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
desc32->length = htole16(NETMAP_BUF_SIZE);
desc32->flags = htole16(NFE_RX_READY);
}
netmap_reload_map(sc->rxq.rx_data_tag,
sc->rxq.data[l].rx_data_map, addr);
bus_dmamap_sync(sc->rxq.rx_data_tag,
sc->rxq.data[l].rx_data_map, BUS_DMASYNC_PREREAD);
}
return 1;
}
/*
* Register/unregister. We are already under netmap lock.
*/
static int
nfe_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct nfe_softc *sc = ifp->if_softc;
NFE_LOCK(sc);
nfe_stop(ifp); /* also clear IFF_DRV_RUNNING */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
nfe_init_locked(sc); /* also enable intr */
NFE_UNLOCK(sc);
return (0);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
nfe_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct nfe_softc *sc = ifp->if_softc;
struct nfe_desc32 *desc32 = NULL;
struct nfe_desc64 *desc64 = NULL;
bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
BUS_DMASYNC_POSTREAD);
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
/* slot is the current slot in the netmap ring */
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(sc->txq.tx_data_tag,
sc->txq.data[l].tx_data_map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
if (sc->nfe_flags & NFE_40BIT_ADDR) {
desc64 = &sc->txq.desc64[l];
desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
desc64->vtag = 0;
desc64->length = htole16(len - 1);
desc64->flags =
htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V2);
} else {
desc32 = &sc->txq.desc32[l];
desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
desc32->length = htole16(len - 1);
desc32->flags =
htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V1);
}
bus_dmamap_sync(sc->txq.tx_data_tag,
sc->txq.data[l].tx_data_map, BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
sc->txq.cur = nic_i;
bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* XXX something missing ? where is the last pkt marker ? */
NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
u_int nic_cur = sc->txq.cur;
nic_i = sc->txq.next;
for (n = 0; nic_i != nic_cur; n++, NFE_INC(nic_i, NFE_TX_RING_COUNT)) {
uint16_t flags;
if (sc->nfe_flags & NFE_40BIT_ADDR) {
desc64 = &sc->txq.desc64[l];
flags = le16toh(desc64->flags);
} else {
desc32 = &sc->txq.desc32[l];
flags = le16toh(desc32->flags);
}
if (flags & NFE_TX_VALID)
break;
}
if (n > 0) {
sc->txq.next = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
nfe_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct nfe_softc *sc = ifp->if_softc;
struct nfe_desc32 *desc32;
struct nfe_desc64 *desc64;
if (head > lim)
return netmap_ring_reinit(kring);
bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t flags, len;
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = sc->rxq.cur;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
if (sc->nfe_flags & NFE_40BIT_ADDR) {
desc64 = &sc->rxq.desc64[sc->rxq.cur];
flags = le16toh(desc64->flags);
len = le16toh(desc64->length) & NFE_RX_LEN_MASK;
} else {
desc32 = &sc->rxq.desc32[sc->rxq.cur];
flags = le16toh(desc32->flags);
len = le16toh(desc32->length) & NFE_RX_LEN_MASK;
}
if (flags & NFE_RX_READY)
break;
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(sc->rxq.rx_data_tag,
sc->rxq.data[nic_i].rx_data_map,
BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
sc->rxq.cur = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(sc->rxq.rx_data_tag,
sc->rxq.data[l].rx_data_map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
if (sc->nfe_flags & NFE_40BIT_ADDR) {
desc64 = &sc->rxq.desc64[nic_i];
desc64->physaddr[0] =
htole32(NFE_ADDR_HI(paddr));
desc64->physaddr[1] =
htole32(NFE_ADDR_LO(paddr));
desc64->length = htole16(NETMAP_BUF_SIZE);
desc64->flags = htole16(NFE_RX_READY);
} else {
desc32 = &sc->rxq.desc32[nic_i];
desc32->physaddr =
htole32(NFE_ADDR_LO(paddr));
desc32->length = htole16(NETMAP_BUF_SIZE);
desc32->flags = htole16(NFE_RX_READY);
}
bus_dmamap_sync(sc->rxq.rx_data_tag,
sc->rxq.data[nic_i].rx_data_map,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
static void
nfe_netmap_attach(struct nfe_softc *sc)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = sc->nfe_ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
na.num_tx_desc = NFE_TX_RING_COUNT;
na.num_rx_desc = NFE_RX_RING_COUNT;
na.nm_txsync = nfe_netmap_txsync;
na.nm_rxsync = nfe_netmap_rxsync;
na.nm_register = nfe_netmap_reg;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na, 1);
}
/* end of file */

View File

@ -1,383 +0,0 @@
/*
* Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/if_re_netmap.h 234225 2012-04-13 15:33:12Z luigi $
*
* netmap support for: re
*
* For more details on netmap support please see ixgbe_netmap.h
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
/*
* Register/unregister. We are already under netmap lock.
*/
static int
re_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct rl_softc *adapter = ifp->if_softc;
RL_LOCK(adapter);
re_stop(adapter); /* also clears IFF_DRV_RUNNING */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
re_init_locked(adapter); /* also enables intr */
RL_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
re_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* device-specific */
struct rl_softc *sc = ifp->if_softc;
struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ?
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = sc->rl_ldata.rl_tx_prodidx;
// XXX or netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[nic_i];
int cmd = slot->len | RL_TDESC_CMD_EOF |
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
NM_CHECK_ADDR_LEN(addr, len);
if (nic_i == lim) /* mark end of ring */
cmd |= RL_TDESC_CMD_EOR;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
txd[nic_i].tx_dmamap, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
desc->rl_cmdstat = htole32(cmd);
/* make sure changes to the buffer are synced */
bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
txd[nic_i].tx_dmamap,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
sc->rl_ldata.rl_tx_prodidx = nic_i;
kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
/* start ? */
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
nic_i = sc->rl_ldata.rl_tx_considx;
for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx;
n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) {
uint32_t cmdstat =
le32toh(sc->rl_ldata.rl_tx_list[nic_i].rl_cmdstat);
if (cmdstat & RL_TDESC_STAT_OWN)
break;
}
if (n > 0) {
sc->rl_ldata.rl_tx_considx = nic_i;
sc->rl_ldata.rl_tx_free += n;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
*/
static int
re_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct rl_softc *sc = ifp->if_softc;
struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
if (head > lim)
return netmap_ring_reinit(kring);
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part: import newly received packets.
*
* This device uses all the buffers in the ring, so we need
* another termination condition in addition to RL_RDESC_STAT_OWN
* cleared (all buffers could have it cleared). The easiest one
* is to stop right before nm_hwcur.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
uint32_t stop_i = nm_prev(kring->nr_hwcur, lim);
nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
nm_i = netmap_idx_n2k(kring, nic_i);
while (nm_i != stop_i) {
struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i];
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
uint32_t total_len;
if ((rxstat & RL_RDESC_STAT_OWN) != 0)
break;
total_len = rxstat & sc->rl_rxlenmask;
/* XXX subtract crc */
total_len = (total_len < 4) ? 0 : total_len - 4;
ring->slot[nm_i].len = total_len;
ring->slot[nm_i].flags = slot_flags;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);
// sc->rl_ifp->if_ipackets++;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
sc->rl_ldata.rl_rx_prodidx = nic_i;
kring->nr_hwtail = nm_i;
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[nic_i];
int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (nic_i == lim) /* mark end of ring */
cmd |= RL_RDESC_CMD_EOR;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
desc->rl_cmdstat = htole32(cmd);
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/*
* Additional routines to init the tx and rx rings.
* In other drivers we do that inline in the main code.
*/
static void
re_netmap_tx_init(struct rl_softc *sc)
{
struct rl_txdesc *txd;
struct rl_desc *desc;
int i, n;
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return;
}
slot = netmap_reset(na, NR_TX, 0, 0);
/* slot is NULL if we are not in netmap mode */
if (!slot)
return; // XXX cannot happen
/* in netmap mode, overwrite addresses and maps */
txd = sc->rl_ldata.rl_tx_desc;
desc = sc->rl_ldata.rl_tx_list;
n = sc->rl_ldata.rl_tx_desc_cnt;
/* l points in the netmap ring, i points in the NIC ring */
for (i = 0; i < n; i++) {
uint64_t paddr;
int l = netmap_idx_n2k(&na->tx_rings[0], i);
void *addr = PNMB(slot + l, &paddr);
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_load_map(sc->rl_ldata.rl_tx_mtag,
txd[i].tx_dmamap, addr);
}
}
static void
re_netmap_rx_init(struct rl_softc *sc)
{
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
struct rl_desc *desc = sc->rl_ldata.rl_rx_list;
uint32_t cmdstat;
uint32_t nic_i, max_avail;
uint32_t const n = sc->rl_ldata.rl_rx_desc_cnt;
if (!slot)
return;
/*
* Do not release the slots owned by userspace,
* and also keep one empty.
*/
max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
for (nic_i = 0; nic_i < n; nic_i++) {
void *addr;
uint64_t paddr;
uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i);
addr = PNMB(slot + nm_i, &paddr);
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, addr);
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, BUS_DMASYNC_PREREAD);
desc[nic_i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[nic_i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
cmdstat = NETMAP_BUF_SIZE;
if (nic_i == n - 1) /* mark the end of ring */
cmdstat |= RL_RDESC_CMD_EOR;
if (nic_i < max_avail)
cmdstat |= RL_RDESC_CMD_OWN;
desc[nic_i].rl_cmdstat = htole32(cmdstat);
}
}
static void
re_netmap_attach(struct rl_softc *sc)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = sc->rl_ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt;
na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt;
na.nm_txsync = re_netmap_txsync;
na.nm_rxsync = re_netmap_rxsync;
na.nm_register = re_netmap_reg;
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -1,495 +0,0 @@
/*
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 244514 2012-12-20 22:26:03Z luigi $
*
* netmap support for: ixgbe
*
* This file is meant to be a reference on how to implement
* netmap support for a network driver.
* This file contains code but only static or inline functions used
* by a single driver. To avoid replication of code we just #include
* it near the beginning of the standard driver.
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
/*
* Some drivers may need the following headers. Others
* already include them by default
#include <vm/vm.h>
#include <vm/pmap.h>
*/
#include <dev/netmap/netmap_kern.h>
/*
* device-specific sysctl variables:
*
* ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
* During regular operations the CRC is stripped, but on some
* hardware reception of frames not multiple of 64 is slower,
* so using crcstrip=0 helps in benchmarks.
*
* ix_rx_miss, ix_rx_miss_bufs:
* count packets that might be missed due to lost interrupts.
*/
SYSCTL_DECL(_dev_netmap);
static int ix_rx_miss, ix_rx_miss_bufs, ix_crcstrip;
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs");
static void
set_crcstrip(struct ixgbe_hw *hw, int onoff)
{
/* crc stripping is set in two places:
* IXGBE_HLREG0 (modified on init_locked and hw reset)
* IXGBE_RDRXCTL (set by the original driver in
* ixgbe_setup_hw_rsc() called in init_locked.
* We disable the setting when netmap is compiled in).
* We update the values here, but also in ixgbe.c because
* init_locked sometimes is called outside our control.
*/
uint32_t hl, rxc;
hl = IXGBE_READ_REG(hw, IXGBE_HLREG0);
rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
if (netmap_verbose)
D("%s read HLREG 0x%x rxc 0x%x",
onoff ? "enter" : "exit", hl, rxc);
/* hw requirements ... */
rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
rxc |= IXGBE_RDRXCTL_RSCACKC;
if (onoff && !ix_crcstrip) {
/* keep the crc. Fast rx */
hl &= ~IXGBE_HLREG0_RXCRCSTRP;
rxc &= ~IXGBE_RDRXCTL_CRCSTRIP;
} else {
/* reset default mode */
hl |= IXGBE_HLREG0_RXCRCSTRP;
rxc |= IXGBE_RDRXCTL_CRCSTRIP;
}
if (netmap_verbose)
D("%s write HLREG 0x%x rxc 0x%x",
onoff ? "enter" : "exit", hl, rxc);
IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl);
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
}
/*
* Register/unregister. We are already under netmap lock.
* Only called on the first register or the last unregister.
*/
static int
ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
IXGBE_CORE_LOCK(adapter);
ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
set_crcstrip(&adapter->hw, onoff);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
ixgbe_init_locked(adapter); /* also enables intr */
set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
IXGBE_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
*
* All information is in the kring.
* Userspace wants to send packets up to the one before kring->rhead,
* kernel knows kring->nr_hwcur is the first unsent packet.
*
* Here we push packets out (as many as possible), and possibly
* reclaim buffers from previously completed transmission.
*
* The caller (netmap) guarantees that there is only one instance
* running at any time. Any interference with other driver
* methods should be handled by the individual drivers.
*/
static int
ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/*
* interrupts on every tx packet are expensive so request
* them every half ring, or where NS_REPORT is set
*/
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
int reclaim_tx;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* First part: process new packets to send.
* nm_i is the current index in the netmap ring,
* nic_i is the corresponding index in the NIC ring.
* The two numbers differ because upon a *_init() we reset
* the NIC ring but leave the netmap ring unchanged.
* For the transmit ring, we have
*
* nm_i = kring->nr_hwcur
* nic_i = IXGBE_TDT (not tracked in the driver)
* and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* In this driver kring->nkr_hwofs >= 0, but for other
* drivers it might be negative as well.
*/
/*
* If we have packets to send (kring->nr_hwcur != kring->rhead)
* iterate over the netmap ring, fetch length and update
* the corresponding slot in the NIC ring. Some drivers also
* need to update the buffer's physical address in the NIC slot
* even NS_BUF_CHANGED is not set (PNMB computes the addresses).
*
* The netmap_reload_map() calls is especially expensive,
* even when (as in this case) the tag is 0, so do only
* when the buffer has actually changed.
*
* If possible do not set the report/intr bit on all slots,
* but only a few times per ring or when NS_REPORT is set.
*
* Finally, on 10G and faster drivers, it might be useful
* to prefetch the next slot and txr entry.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
__builtin_prefetch(&ring->slot[nm_i]);
__builtin_prefetch(&txr->tx_buffers[nic_i]);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
/* device-specific */
union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
IXGBE_TXD_CMD_RS : 0;
/* prefetch for next round */
__builtin_prefetch(&ring->slot[nm_i + 1]);
__builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
/* Use legacy descriptor, they are faster? */
curr->read.buffer_addr = htole64(paddr);
curr->read.olinfo_status = 0;
curr->read.cmd_type_len = htole32(len | flags |
IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
/* make sure changes to the buffer are synced */
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), nic_i);
}
/*
* Second part: reclaim buffers for completed transmissions.
* Because this is expensive (we read a NIC register etc.)
* we only do it in specific cases (see below).
*/
if (flags & NAF_FORCE_RECLAIM) {
reclaim_tx = 1; /* forced reclaim */
} else if (!nm_kr_txempty(kring)) {
reclaim_tx = 0; /* have buffers, no reclaim */
} else {
/*
* No buffers available. Locate previous slot with
* REPORT_STATUS set.
* If the slot has DD set, we can reclaim space,
* otherwise wait for the next interrupt.
* This enables interrupt moderation on the tx
* side though it might reduce throughput.
*/
struct ixgbe_legacy_tx_desc *txd =
(struct ixgbe_legacy_tx_desc *)txr->tx_base;
nic_i = txr->next_to_clean + report_frequency;
if (nic_i > lim)
nic_i -= lim + 1;
// round to the closest with dd set
nic_i = (nic_i < kring->nkr_num_slots / 4 ||
nic_i >= kring->nkr_num_slots*3/4) ?
0 : report_frequency;
reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
}
if (reclaim_tx) {
/*
* Record completed transmissions.
* We (re)use the driver's txr->next_to_clean to keep
* track of the most recently completed transmission.
*
* The datasheet discourages the use of TDH to find
* out the number of sent packets, but we only set
* REPORT_STATUS in a few slots so TDH is the only
* good way.
*/
nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
if (nic_i != txr->next_to_clean) {
/* some tx completed, increment avail */
txr->next_to_clean = nic_i;
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
}
nm_txsync_finalize(kring);
return 0;
}
/*
* Reconcile kernel and user view of the receive ring.
* Same as for the txsync, this routine must be efficient.
* The caller guarantees a single invocations, but races against
* the rest of the driver should be handled here.
*
* On call, kring->rhead is the first packet that userspace wants
* to keep, and kring->rcur is the wakeup point.
* The kernel has previously reported packets up to kring->rtail.
*
* If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
* of whether or not we received an interrupt.
*/
static int
ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
if (head > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part: import newly received packets.
*
* nm_i is the index of the next free slot in the netmap ring,
* nic_i is the index of the next received packet in the NIC ring,
* and they may differ in case if_init() has been called while
* in netmap mode. For the receive ring we have
*
* nic_i = rxr->next_to_check;
* nm_i = kring->nr_hwtail (previous)
* and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* rxr->next_to_check is set to 0 on a ring reinit
*/
if (netmap_no_pendintr || force_update) {
int crclen = ix_crcstrip ? 0 : 4;
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
if (netmap_no_pendintr && !force_update) {
/* diagnostics */
ix_rx_miss ++;
ix_rx_miss_bufs += n;
}
rxr->next_to_check = nic_i;
kring->nr_hwtail = nm_i;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Second part: skip past packets that userspace has released.
* (kring->nr_hwcur to kring->rhead excluded),
* and make the buffers available for reception.
* As usual nm_i is the index in the netmap ring,
* nic_i is the index in the NIC ring, and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->wb.upper.status_error = 0;
curr->read.pkt_addr = htole64(paddr);
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = head;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/*
* The attach routine, called near the end of ixgbe_attach(),
* fills the parameters for netmap_attach() and calls it.
* It cannot fail, in the worst case (such as no memory)
* netmap mode will be disabled and the driver will only
* operate in standard mode.
*/
static void
ixgbe_netmap_attach(struct adapter *adapter)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = adapter->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
na.num_tx_desc = adapter->num_tx_desc;
na.num_rx_desc = adapter->num_rx_desc;
na.nm_txsync = ixgbe_netmap_txsync;
na.nm_rxsync = ixgbe_netmap_rxsync;
na.nm_register = ixgbe_netmap_reg;
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
netmap_attach(&na);
}
/* end of file */

File diff suppressed because it is too large Load Diff

View File

@ -1,657 +0,0 @@
/*
* Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $FreeBSD$ */
#include <sys/types.h>
#include <sys/module.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/poll.h> /* POLLIN, POLLOUT */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/conf.h> /* DEV_MODULE */
#include <sys/endian.h>
#include <sys/rwlock.h>
#include <vm/vm.h> /* vtophys */
#include <vm/pmap.h> /* vtophys */
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/uma.h>
#include <sys/malloc.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* */
#include <netinet/in.h> /* in6_cksum_pseudo() */
#include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
{
/* TODO XXX please use the FreeBSD implementation for this. */
uint16_t *words = (uint16_t *)data;
int nw = len / 2;
int i;
for (i = 0; i < nw; i++)
cur_sum += be16toh(words[i]);
if (len & 1)
cur_sum += (data[len-1] << 8);
return cur_sum;
}
/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
* return value is in network byte order.
*/
uint16_t nm_csum_fold(rawsum_t cur_sum)
{
/* TODO XXX please use the FreeBSD implementation for this. */
while (cur_sum >> 16)
cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
return htobe16((~cur_sum) & 0xFFFF);
}
uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
{
#if 0
return in_cksum_hdr((void *)iph);
#else
return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
#endif
}
void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
size_t datalen, uint16_t *check)
{
uint16_t pseudolen = datalen + iph->protocol;
/* Compute and insert the pseudo-header cheksum. */
*check = in_pseudo(iph->saddr, iph->daddr,
htobe16(pseudolen));
/* Compute the checksum on TCP/UDP header + payload
* (includes the pseudo-header).
*/
*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
}
void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
size_t datalen, uint16_t *check)
{
#ifdef INET6
*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
#else
static int notsupported = 0;
if (!notsupported) {
notsupported = 1;
D("inet6 segmentation not supported");
}
#endif
}
/*
* Intercept the rx routine in the standard device driver.
* Second argument is non-zero to intercept, 0 to restore
*/
int
netmap_catch_rx(struct netmap_adapter *na, int intercept)
{
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
struct ifnet *ifp = na->ifp;
if (intercept) {
if (gna->save_if_input) {
D("cannot intercept again");
return EINVAL; /* already set */
}
gna->save_if_input = ifp->if_input;
ifp->if_input = generic_rx_handler;
} else {
if (!gna->save_if_input){
D("cannot restore");
return EINVAL; /* not saved */
}
ifp->if_input = gna->save_if_input;
gna->save_if_input = NULL;
}
return 0;
}
/*
* Intercept the packet steering routine in the tx path,
* so that we can decide which queue is used for an mbuf.
* Second argument is non-zero to intercept, 0 to restore.
* On freebsd we just intercept if_transmit.
*/
void
netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
{
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = na->ifp;
if (enable) {
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
} else {
ifp->if_transmit = na->if_transmit;
}
}
/*
* Transmit routine used by generic_netmap_txsync(). Returns 0 on success
* and non-zero on error (which may be packet drops or other errors).
* addr and len identify the netmap buffer, m is the (preallocated)
* mbuf to use for transmissions.
*
* We should add a reference to the mbuf so the m_freem() at the end
* of the transmission does not consume resources.
*
* On FreeBSD, and on multiqueue cards, we can force the queue using
* if ((m->m_flags & M_FLOWID) != 0)
* i = m->m_pkthdr.flowid % adapter->num_queues;
* else
* i = curcpu % adapter->num_queues;
*
*/
int
generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
void *addr, u_int len, u_int ring_nr)
{
int ret;
m->m_len = m->m_pkthdr.len = 0;
// copy data to the mbuf
m_copyback(m, 0, len, addr);
// inc refcount. We are alone, so we can skip the atomic
atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
m->m_flags |= M_FLOWID;
m->m_pkthdr.flowid = ring_nr;
m->m_pkthdr.rcvif = ifp; /* used for tx notification */
ret = NA(ifp)->if_transmit(ifp, m);
return ret;
}
/*
* The following two functions are empty until we have a generic
* way to extract the info from the ifp
*/
int
generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
{
D("called");
return 0;
}
void
generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
{
D("called");
*txq = netmap_generic_rings;
*rxq = netmap_generic_rings;
}
void netmap_mitigation_init(struct nm_generic_mit *mit, int idx,
struct netmap_adapter *na)
{
ND("called");
mit->mit_pending = 0;
mit->mit_ring_idx = idx;
mit->mit_na = na;
}
void netmap_mitigation_start(struct nm_generic_mit *mit)
{
ND("called");
}
void netmap_mitigation_restart(struct nm_generic_mit *mit)
{
ND("called");
}
int netmap_mitigation_active(struct nm_generic_mit *mit)
{
ND("called");
return 0;
}
void netmap_mitigation_cleanup(struct nm_generic_mit *mit)
{
ND("called");
}
/*
* In order to track whether pages are still mapped, we hook into
* the standard cdev_pager and intercept the constructor and
* destructor.
*/
struct netmap_vm_handle_t {
struct cdev *dev;
struct netmap_priv_d *priv;
};
static int
netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_ooffset_t foff, struct ucred *cred, u_short *color)
{
struct netmap_vm_handle_t *vmh = handle;
if (netmap_verbose)
D("handle %p size %jd prot %d foff %jd",
handle, (intmax_t)size, prot, (intmax_t)foff);
dev_ref(vmh->dev);
return 0;
}
static void
netmap_dev_pager_dtor(void *handle)
{
struct netmap_vm_handle_t *vmh = handle;
struct cdev *dev = vmh->dev;
struct netmap_priv_d *priv = vmh->priv;
if (netmap_verbose)
D("handle %p", handle);
netmap_dtor(priv);
free(vmh, M_DEVBUF);
dev_rel(dev);
}
static int
netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
int prot, vm_page_t *mres)
{
struct netmap_vm_handle_t *vmh = object->handle;
struct netmap_priv_d *priv = vmh->priv;
vm_paddr_t paddr;
vm_page_t page;
vm_memattr_t memattr;
vm_pindex_t pidx;
ND("object %p offset %jd prot %d mres %p",
object, (intmax_t)offset, prot, mres);
memattr = object->memattr;
pidx = OFF_TO_IDX(offset);
paddr = netmap_mem_ofstophys(priv->np_mref, offset);
if (paddr == 0)
return VM_PAGER_FAIL;
if (((*mres)->flags & PG_FICTITIOUS) != 0) {
/*
* If the passed in result page is a fake page, update it with
* the new physical address.
*/
page = *mres;
vm_page_updatefake(page, paddr, memattr);
} else {
/*
* Replace the passed in reqpage page with our own fake page and
* free up the all of the original pages.
*/
#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */
#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
#define VM_OBJECT_WLOCK VM_OBJECT_LOCK
#endif /* VM_OBJECT_WUNLOCK */
VM_OBJECT_WUNLOCK(object);
page = vm_page_getfake(paddr, memattr);
VM_OBJECT_WLOCK(object);
vm_page_lock(*mres);
vm_page_free(*mres);
vm_page_unlock(*mres);
*mres = page;
vm_page_insert(page, object, pidx);
}
page->valid = VM_PAGE_BITS_ALL;
return (VM_PAGER_OK);
}
static struct cdev_pager_ops netmap_cdev_pager_ops = {
.cdev_pg_ctor = netmap_dev_pager_ctor,
.cdev_pg_dtor = netmap_dev_pager_dtor,
.cdev_pg_fault = netmap_dev_pager_fault,
};
static int
netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
vm_size_t objsize, vm_object_t *objp, int prot)
{
int error;
struct netmap_vm_handle_t *vmh;
struct netmap_priv_d *priv;
vm_object_t obj;
if (netmap_verbose)
D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
(intmax_t )*foff, (intmax_t )objsize, objp, prot);
vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (vmh == NULL)
return ENOMEM;
vmh->dev = cdev;
NMG_LOCK();
error = devfs_get_cdevpriv((void**)&priv);
if (error)
goto err_unlock;
vmh->priv = priv;
priv->np_refcount++;
NMG_UNLOCK();
error = netmap_get_memory(priv);
if (error)
goto err_deref;
obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
&netmap_cdev_pager_ops, objsize, prot,
*foff, NULL);
if (obj == NULL) {
D("cdev_pager_allocate failed");
error = EINVAL;
goto err_deref;
}
*objp = obj;
return 0;
err_deref:
NMG_LOCK();
priv->np_refcount--;
err_unlock:
NMG_UNLOCK();
// err:
free(vmh, M_DEVBUF);
return error;
}
// XXX can we remove this ?
static int
netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
if (netmap_verbose)
D("dev %p fflag 0x%x devtype %d td %p",
dev, fflag, devtype, td);
return 0;
}
static int
netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
struct netmap_priv_d *priv;
int error;
(void)dev;
(void)oflags;
(void)devtype;
(void)td;
// XXX wait or nowait ?
priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (priv == NULL)
return ENOMEM;
error = devfs_set_cdevpriv(priv, netmap_dtor);
if (error)
return error;
priv->np_refcount = 1;
return 0;
}
/******************** kqueue support ****************/
/*
* The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
* We use a non-zero argument to distinguish the call from the one
* in kevent_scan() which instead also needs to run netmap_poll().
* The knote uses a global mutex for the time being. We might
* try to reuse the one in the si, but it is not allocated
* permanently so it might be a bit tricky.
*
* The *kqfilter function registers one or another f_event
* depending on read or write mode.
* In the call to f_event() td_fpop is NULL so any child function
* calling devfs_get_cdevpriv() would fail - and we need it in
* netmap_poll(). As a workaround we store priv into kn->kn_hook
* and pass it as first argument to netmap_poll(), which then
* uses the failure to tell that we are called from f_event()
* and do not need the selrecord().
*/
void freebsd_selwakeup(struct selinfo *si, int pri);
void
freebsd_selwakeup(struct selinfo *si, int pri)
{
if (netmap_verbose)
D("on knote %p", &si->si_note);
selwakeuppri(si, pri);
/* use a non-zero hint to tell the notification from the
* call done in kqueue_scan() which uses 0
*/
KNOTE_UNLOCKED(&si->si_note, 0x100 /* notification */);
}
static void
netmap_knrdetach(struct knote *kn)
{
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
struct selinfo *si = priv->np_rxsi;
D("remove selinfo %p", si);
knlist_remove(&si->si_note, kn, 0);
}
static void
netmap_knwdetach(struct knote *kn)
{
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
struct selinfo *si = priv->np_txsi;
D("remove selinfo %p", si);
knlist_remove(&si->si_note, kn, 0);
}
/*
* callback from notifies (generated externally) and our
* calls to kevent(). The former we just return 1 (ready)
* since we do not know better.
* In the latter we call netmap_poll and return 0/1 accordingly.
*/
static int
netmap_knrw(struct knote *kn, long hint, int events)
{
struct netmap_priv_d *priv;
int revents;
if (hint != 0) {
ND(5, "call from notify");
return 1; /* assume we are ready */
}
priv = kn->kn_hook;
/* the notification may come from an external thread,
* in which case we do not want to run the netmap_poll
* This should be filtered above, but check just in case.
*/
if (curthread != priv->np_td) { /* should not happen */
RD(5, "curthread changed %p %p", curthread, priv->np_td);
return 1;
} else {
revents = netmap_poll((void *)priv, events, curthread);
return (events & revents) ? 1 : 0;
}
}
static int
netmap_knread(struct knote *kn, long hint)
{
return netmap_knrw(kn, hint, POLLIN);
}
static int
netmap_knwrite(struct knote *kn, long hint)
{
return netmap_knrw(kn, hint, POLLOUT);
}
static struct filterops netmap_rfiltops = {
.f_isfd = 1,
.f_detach = netmap_knrdetach,
.f_event = netmap_knread,
};
static struct filterops netmap_wfiltops = {
.f_isfd = 1,
.f_detach = netmap_knwdetach,
.f_event = netmap_knwrite,
};
/*
* This is called when a thread invokes kevent() to record
* a change in the configuration of the kqueue().
* The 'priv' should be the same as in the netmap device.
*/
static int
netmap_kqfilter(struct cdev *dev, struct knote *kn)
{
struct netmap_priv_d *priv;
int error;
struct netmap_adapter *na;
struct selinfo *si;
int ev = kn->kn_filter;
if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
D("bad filter request %d", ev);
return 1;
}
error = devfs_get_cdevpriv((void**)&priv);
if (error) {
D("device not yet setup");
return 1;
}
na = priv->np_na;
if (na == NULL) {
D("no netmap adapter for this file descriptor");
return 1;
}
/* the si is indicated in the priv */
si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
// XXX lock(priv) ?
kn->kn_fop = (ev == EVFILT_WRITE) ?
&netmap_wfiltops : &netmap_rfiltops;
kn->kn_hook = priv;
knlist_add(&si->si_note, kn, 1);
// XXX unlock(priv)
ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
na, na->ifp->if_xname, curthread, priv, kn,
priv->np_nifp,
kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
return 0;
}
struct cdevsw netmap_cdevsw = {
.d_version = D_VERSION,
.d_name = "netmap",
.d_open = netmap_open,
.d_mmap_single = netmap_mmap_single,
.d_ioctl = netmap_ioctl,
.d_poll = netmap_poll,
.d_kqfilter = netmap_kqfilter,
.d_close = netmap_close,
};
/*--- end of kqueue support ----*/
/*
* Kernel entry point.
*
* Initialize/finalize the module and return.
*
* Return 0 on success, errno on failure.
*/
static int
netmap_loader(__unused struct module *module, int event, __unused void *arg)
{
int error = 0;
switch (event) {
case MOD_LOAD:
error = netmap_init();
break;
case MOD_UNLOAD:
netmap_fini();
break;
default:
error = EOPNOTSUPP;
break;
}
return (error);
}
DEV_MODULE(netmap, netmap_loader, NULL);

View File

@ -1,818 +0,0 @@
/*
* Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This module implements netmap support on top of standard,
* unmodified device drivers.
*
* A NIOCREGIF request is handled here if the device does not
* have native support. TX and RX rings are emulated as follows:
*
* NIOCREGIF
* We preallocate a block of TX mbufs (roughly as many as
* tx descriptors; the number is not critical) to speed up
* operation during transmissions. The refcount on most of
* these buffers is artificially bumped up so we can recycle
* them more easily. Also, the destructor is intercepted
* so we use it as an interrupt notification to wake up
* processes blocked on a poll().
*
* For each receive ring we allocate one "struct mbq"
* (an mbuf tailq plus a spinlock). We intercept packets
* (through if_input)
* on the receive path and put them in the mbq from which
* netmap receive routines can grab them.
*
* TX:
* in the generic_txsync() routine, netmap buffers are copied
* (or linked, in a future) to the preallocated mbufs
* and pushed to the transmit queue. Some of these mbufs
* (those with NS_REPORT, or otherwise every half ring)
* have the refcount=1, others have refcount=2.
* When the destructor is invoked, we take that as
* a notification that all mbufs up to that one in
* the specific ring have been completed, and generate
* the equivalent of a transmit interrupt.
*
* RX:
*
*/
#ifdef __FreeBSD__
#include <sys/cdefs.h> /* prerequisite */
__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257666 2013-11-05 01:06:22Z luigi $");
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/lock.h> /* PROT_EXEC */
#include <sys/rwlock.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
// XXX temporary - D() defined here
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#define rtnl_lock() D("rtnl_lock called");
#define rtnl_unlock() D("rtnl_unlock called");
#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
/*
* mbuf wrappers
*/
/*
* we allocate an EXT_PACKET
*/
#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
/* mbuf destructor, also need to change the type to EXT_EXTREF,
* add an M_NOFREE flag, and then clear the flag and
* chain into uma_zfree(zone_pack, mf)
* (or reinstall the buffer ?)
*/
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
(m)->m_ext.ext_free = (void *)fn; \
(m)->m_ext.ext_type = EXT_EXTREF; \
} while (0)
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
#else /* linux */
#include "bsd_glue.h"
#include <linux/rtnetlink.h> /* rtnl_[un]lock() */
#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
#include <linux/hrtimer.h>
//#define REG_RESET
#endif /* linux */
/* Common headers. */
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
/* ======================== usage stats =========================== */
#ifdef RATE_GENERIC
#define IFRATE(x) x
struct rate_stats {
unsigned long txpkt;
unsigned long txsync;
unsigned long txirq;
unsigned long rxpkt;
unsigned long rxirq;
unsigned long rxsync;
};
struct rate_context {
unsigned refcount;
struct timer_list timer;
struct rate_stats new;
struct rate_stats old;
};
#define RATE_PRINTK(_NAME_) \
printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
#define RATE_PERIOD 2
static void rate_callback(unsigned long arg)
{
struct rate_context * ctx = (struct rate_context *)arg;
struct rate_stats cur = ctx->new;
int r;
RATE_PRINTK(txpkt);
RATE_PRINTK(txsync);
RATE_PRINTK(txirq);
RATE_PRINTK(rxpkt);
RATE_PRINTK(rxsync);
RATE_PRINTK(rxirq);
printk("\n");
ctx->old = cur;
r = mod_timer(&ctx->timer, jiffies +
msecs_to_jiffies(RATE_PERIOD * 1000));
if (unlikely(r))
D("[v1000] Error: mod_timer()");
}
static struct rate_context rate_ctx;
void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
{
if (txp) rate_ctx.new.txpkt++;
if (txs) rate_ctx.new.txsync++;
if (txi) rate_ctx.new.txirq++;
if (rxp) rate_ctx.new.rxpkt++;
if (rxs) rate_ctx.new.rxsync++;
if (rxi) rate_ctx.new.rxirq++;
}
#else /* !RATE */
#define IFRATE(x)
#endif /* !RATE */
/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */
/*
* Wrapper used by the generic adapter layer to notify
* the poller threads. Differently from netmap_rx_irq(), we check
* only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
*/
static void
netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
{
if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
return;
netmap_common_irq(ifp, q, work_done);
}
/* Enable/disable netmap mode for a generic network interface. */
static int
generic_netmap_register(struct netmap_adapter *na, int enable)
{
struct ifnet *ifp = na->ifp;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
struct mbuf *m;
int error;
int i, r;
if (!na)
return EINVAL;
#ifdef REG_RESET
error = ifp->netdev_ops->ndo_stop(ifp);
if (error) {
return error;
}
#endif /* REG_RESET */
if (enable) { /* Enable netmap mode. */
/* Init the mitigation support on all the rx queues. */
gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (!gna->mit) {
D("mitigation allocation failed");
error = ENOMEM;
goto out;
}
for (r=0; r<na->num_rx_rings; r++)
netmap_mitigation_init(&gna->mit[r], r, na);
/* Initialize the rx queue, as generic_rx_handler() can
* be called as soon as netmap_catch_rx() returns.
*/
for (r=0; r<na->num_rx_rings; r++) {
mbq_safe_init(&na->rx_rings[r].rx_queue);
}
/*
* Preallocate packet buffers for the tx rings.
*/
for (r=0; r<na->num_tx_rings; r++)
na->tx_rings[r].tx_pool = NULL;
for (r=0; r<na->num_tx_rings; r++) {
na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (!na->tx_rings[r].tx_pool) {
D("tx_pool allocation failed");
error = ENOMEM;
goto free_tx_pools;
}
for (i=0; i<na->num_tx_desc; i++)
na->tx_rings[r].tx_pool[i] = NULL;
for (i=0; i<na->num_tx_desc; i++) {
m = netmap_get_mbuf(GENERIC_BUF_SIZE);
if (!m) {
D("tx_pool[%d] allocation failed", i);
error = ENOMEM;
goto free_tx_pools;
}
na->tx_rings[r].tx_pool[i] = m;
}
}
rtnl_lock();
/* Prepare to intercept incoming traffic. */
error = netmap_catch_rx(na, 1);
if (error) {
D("netdev_rx_handler_register() failed (%d)", error);
goto register_handler;
}
ifp->if_capenable |= IFCAP_NETMAP;
/* Make netmap control the packet steering. */
netmap_catch_tx(gna, 1);
rtnl_unlock();
#ifdef RATE_GENERIC
if (rate_ctx.refcount == 0) {
D("setup_timer()");
memset(&rate_ctx, 0, sizeof(rate_ctx));
setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
D("Error: mod_timer()");
}
}
rate_ctx.refcount++;
#endif /* RATE */
} else if (na->tx_rings[0].tx_pool) {
/* Disable netmap mode. We enter here only if the previous
generic_netmap_register(na, 1) was successfull.
If it was not, na->tx_rings[0].tx_pool was set to NULL by the
error handling code below. */
rtnl_lock();
ifp->if_capenable &= ~IFCAP_NETMAP;
/* Release packet steering control. */
netmap_catch_tx(gna, 0);
/* Do not intercept packets on the rx path. */
netmap_catch_rx(na, 0);
rtnl_unlock();
/* Free the mbufs going to the netmap rings */
for (r=0; r<na->num_rx_rings; r++) {
mbq_safe_purge(&na->rx_rings[r].rx_queue);
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
}
for (r=0; r<na->num_rx_rings; r++)
netmap_mitigation_cleanup(&gna->mit[r]);
free(gna->mit, M_DEVBUF);
for (r=0; r<na->num_tx_rings; r++) {
for (i=0; i<na->num_tx_desc; i++) {
m_freem(na->tx_rings[r].tx_pool[i]);
}
free(na->tx_rings[r].tx_pool, M_DEVBUF);
}
#ifdef RATE_GENERIC
if (--rate_ctx.refcount == 0) {
D("del_timer()");
del_timer(&rate_ctx.timer);
}
#endif
}
#ifdef REG_RESET
error = ifp->netdev_ops->ndo_open(ifp);
if (error) {
goto free_tx_pools;
}
#endif
return 0;
register_handler:
rtnl_unlock();
free_tx_pools:
for (r=0; r<na->num_tx_rings; r++) {
if (na->tx_rings[r].tx_pool == NULL)
continue;
for (i=0; i<na->num_tx_desc; i++)
if (na->tx_rings[r].tx_pool[i])
m_freem(na->tx_rings[r].tx_pool[i]);
free(na->tx_rings[r].tx_pool, M_DEVBUF);
na->tx_rings[r].tx_pool = NULL;
}
for (r=0; r<na->num_rx_rings; r++) {
netmap_mitigation_cleanup(&gna->mit[r]);
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
}
free(gna->mit, M_DEVBUF);
out:
return error;
}
/*
* Callback invoked when the device driver frees an mbuf used
* by netmap to transmit a packet. This usually happens when
* the NIC notifies the driver that transmission is completed.
*/
static void
generic_mbuf_destructor(struct mbuf *m)
{
if (netmap_verbose)
D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
#ifdef __FreeBSD__
m->m_ext.ext_type = EXT_PACKET;
m->m_ext.ext_free = NULL;
if (*(m->m_ext.ref_cnt) == 0)
*(m->m_ext.ref_cnt) = 1;
uma_zfree(zone_pack, m);
#endif /* __FreeBSD__ */
IFRATE(rate_ctx.new.txirq++);
}
/* Record completed transmissions and update hwtail.
*
* The oldest tx buffer not yet completed is at nr_hwtail + 1,
* nr_hwcur is the first unsent buffer.
*/
static u_int
generic_netmap_tx_clean(struct netmap_kring *kring)
{
u_int const lim = kring->nkr_num_slots - 1;
u_int nm_i = nm_next(kring->nr_hwtail, lim);
u_int hwcur = kring->nr_hwcur;
u_int n = 0;
struct mbuf **tx_pool = kring->tx_pool;
while (nm_i != hwcur) { /* buffers not completed */
struct mbuf *m = tx_pool[nm_i];
if (unlikely(m == NULL)) {
/* this is done, try to replenish the entry */
tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
if (unlikely(m == NULL)) {
D("mbuf allocation failed, XXX error");
// XXX how do we proceed ? break ?
return -ENOMEM;
}
} else if (GET_MBUF_REFCNT(m) != 1) {
break; /* This mbuf is still busy: its refcnt is 2. */
}
n++;
nm_i = nm_next(nm_i, lim);
}
kring->nr_hwtail = nm_prev(nm_i, lim);
ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
return n;
}
/*
* We have pending packets in the driver between nr_hwtail +1 and hwcur.
* Compute a position in the middle, to be used to generate
* a notification.
*/
static inline u_int
generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
{
u_int n = kring->nkr_num_slots;
u_int ntc = nm_next(kring->nr_hwtail, n-1);
u_int e;
if (hwcur >= ntc) {
e = (hwcur + ntc) / 2;
} else { /* wrap around */
e = (hwcur + n + ntc) / 2;
if (e >= n) {
e -= n;
}
}
if (unlikely(e >= n)) {
D("This cannot happen");
e = 0;
}
return e;
}
/*
* We have pending packets in the driver between nr_hwtail+1 and hwcur.
* Schedule a notification approximately in the middle of the two.
* There is a race but this is only called within txsync which does
* a double check.
*/
static void
generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
{
struct mbuf *m;
u_int e;
if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
return; /* all buffers are free */
}
e = generic_tx_event_middle(kring, hwcur);
m = kring->tx_pool[e];
if (m == NULL) {
/* This can happen if there is already an event on the netmap
slot 'e': There is nothing to do. */
return;
}
ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
kring->tx_pool[e] = NULL;
SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
// XXX wmb() ?
/* Decrement the refcount an free it if we have the last one. */
m_freem(m);
smp_mb();
}
/*
* generic_netmap_txsync() transforms netmap buffers into mbufs
* and passes them to the standard device driver
* (ndo_start_xmit() or ifp->if_transmit() ).
* On linux this is not done directly, but using dev_queue_xmit(),
* since it implements the TX flow control (and takes some locks).
*/
static int
generic_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */ // j
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
u_int ring_nr = kring->ring_id;
IFRATE(rate_ctx.new.txsync++);
// TODO: handle the case of mbuf allocation failure
rmb();
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
while (nm_i != head) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
void *addr = NMB(slot);
/* device-specific */
struct mbuf *m;
int tx_ret;
NM_CHECK_ADDR_LEN(addr, len);
/* Tale a mbuf from the tx pool and copy in the user packet. */
m = kring->tx_pool[nm_i];
if (unlikely(!m)) {
RD(5, "This should never happen");
kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
if (unlikely(m == NULL)) {
D("mbuf allocation failed");
break;
}
}
/* XXX we should ask notifications when NS_REPORT is set,
* or roughly every half frame. We can optimize this
* by lazily requesting notifications only when a
* transmission fails. Probably the best way is to
* break on failures and set notifications when
* ring->cur == ring->tail || nm_i != cur
*/
tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
if (unlikely(tx_ret)) {
RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
tx_ret, nm_i, head, kring->nr_hwtail);
/*
* No room for this mbuf in the device driver.
* Request a notification FOR A PREVIOUS MBUF,
* then call generic_netmap_tx_clean(kring) to do the
* double check and see if we can free more buffers.
* If there is space continue, else break;
* NOTE: the double check is necessary if the problem
* occurs in the txsync call after selrecord().
* Also, we need some way to tell the caller that not
* all buffers were queued onto the device (this was
* not a problem with native netmap driver where space
* is preallocated). The bridge has a similar problem
* and we solve it there by dropping the excess packets.
*/
generic_set_tx_event(kring, nm_i);
if (generic_netmap_tx_clean(kring)) { /* space now available */
continue;
} else {
break;
}
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
nm_i = nm_next(nm_i, lim);
IFRATE(rate_ctx.new.txpkt ++);
}
/* Update hwcur to the next slot to transmit. */
kring->nr_hwcur = nm_i; /* not head, we could break early */
}
/*
* Second, reclaim completed buffers
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* No more available slots? Set a notification event
* on a netmap slot that will be cleaned in the future.
* No doublecheck is performed, since txsync() will be
* called twice by netmap_poll().
*/
generic_set_tx_event(kring, nm_i);
}
ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
generic_netmap_tx_clean(kring);
nm_txsync_finalize(kring);
return 0;
}
/*
* This handler is registered (through netmap_catch_rx())
* within the attached network interface
* in the RX subsystem, so that every mbuf passed up by
* the driver can be stolen to the network stack.
* Stolen packets are put in a queue where the
* generic_netmap_rxsync() callback can extract them.
*/
void
generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
{
struct netmap_adapter *na = NA(ifp);
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
u_int work_done;
u_int rr = MBUF_RXQ(m); // receive ring number
if (rr >= na->num_rx_rings) {
rr = rr % na->num_rx_rings; // XXX expensive...
}
/* limit the size of the queue */
if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
m_freem(m);
} else {
mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
}
if (netmap_generic_mit < 32768) {
/* no rx mitigation, pass notification up */
netmap_generic_irq(na->ifp, rr, &work_done);
IFRATE(rate_ctx.new.rxirq++);
} else {
/* same as send combining, filter notification if there is a
* pending timer, otherwise pass it up and start a timer.
*/
if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
/* Record that there is some pending work. */
gna->mit[rr].mit_pending = 1;
} else {
netmap_generic_irq(na->ifp, rr, &work_done);
IFRATE(rate_ctx.new.rxirq++);
netmap_mitigation_start(&gna->mit[rr]);
}
}
}
/*
* generic_netmap_rxsync() extracts mbufs from the queue filled by
* generic_netmap_rx_handler() and puts their content in the netmap
* receive ring.
* Access must be protected because the rx handler is asynchronous,
*/
static int
generic_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */ //j,
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
if (head > lim)
return netmap_ring_reinit(kring);
/*
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
/* extract buffers from the rx queue, stop at most one
* slot before nr_hwcur (stop_i)
*/
uint16_t slot_flags = kring->nkr_slot_flags;
u_int stop_i = nm_prev(kring->nr_hwcur, lim);
nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
for (n = 0; nm_i != stop_i; n++) {
int len;
void *addr = NMB(&ring->slot[nm_i]);
struct mbuf *m;
/* we only check the address here on generic rx rings */
if (addr == netmap_buffer_base) { /* Bad buffer */
return netmap_ring_reinit(kring);
}
/*
* Call the locked version of the function.
* XXX Ideally we could grab a batch of mbufs at once
* and save some locking overhead.
*/
m = mbq_safe_dequeue(&kring->rx_queue);
if (!m) /* no more data */
break;
len = MBUF_LEN(m);
m_copydata(m, 0, len, addr);
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
m_freem(m);
nm_i = nm_next(nm_i, lim);
}
if (n) {
kring->nr_hwtail = nm_i;
IFRATE(rate_ctx.new.rxpkt += n);
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
// XXX should we invert the order ?
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != head) {
/* Userspace has released some packets. */
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
slot->flags &= ~NS_BUF_CHANGED;
nm_i = nm_next(nm_i, lim);
}
kring->nr_hwcur = head;
}
/* tell userspace that there might be new packets. */
nm_rxsync_finalize(kring);
IFRATE(rate_ctx.new.rxsync++);
return 0;
}
static void
generic_netmap_dtor(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
struct netmap_adapter *prev_na = gna->prev;
if (prev_na != NULL) {
D("Released generic NA %p", gna);
if_rele(na->ifp);
netmap_adapter_put(prev_na);
}
if (ifp != NULL) {
WNA(ifp) = prev_na;
D("Restored native NA %p", prev_na);
na->ifp = NULL;
}
}
/*
* generic_netmap_attach() makes it possible to use netmap on
* a device without native netmap support.
* This is less performant than native support but potentially
* faster than raw sockets or similar schemes.
*
* In this "emulated" mode, netmap rings do not necessarily
* have the same size as those in the NIC. We use a default
* value and possibly override it if the OS has ways to fetch the
* actual configuration.
*/
int
generic_netmap_attach(struct ifnet *ifp)
{
struct netmap_adapter *na;
struct netmap_generic_adapter *gna;
int retval;
u_int num_tx_desc, num_rx_desc;
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
if (num_tx_desc == 0 || num_rx_desc == 0) {
D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
return EINVAL;
}
gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (gna == NULL) {
D("no memory on attach, give up");
return ENOMEM;
}
na = (struct netmap_adapter *)gna;
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
na->nm_register = &generic_netmap_register;
na->nm_txsync = &generic_netmap_txsync;
na->nm_rxsync = &generic_netmap_rxsync;
na->nm_dtor = &generic_netmap_dtor;
/* when using generic, IFCAP_NETMAP is set so we force
* NAF_SKIP_INTR to use the regular interrupt handler
*/
na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
ifp->num_tx_queues, ifp->real_num_tx_queues,
ifp->tx_queue_len);
ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
ifp->num_rx_queues, ifp->real_num_rx_queues);
generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
retval = netmap_attach_common(na);
if (retval) {
free(gna, M_DEVBUF);
}
return retval;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,163 +0,0 @@
/*
* Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*/
#ifdef linux
#include "bsd_glue.h"
#else /* __FreeBSD__ */
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#endif /* __FreeBSD__ */
#include "netmap_mbq.h"
static inline void __mbq_init(struct mbq *q)
{
q->head = q->tail = NULL;
q->count = 0;
}
void mbq_safe_init(struct mbq *q)
{
mtx_init(&q->lock, "mbq", NULL, MTX_SPIN);
__mbq_init(q);
}
void mbq_init(struct mbq *q)
{
__mbq_init(q);
}
static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
{
m->m_nextpkt = NULL;
if (q->tail) {
q->tail->m_nextpkt = m;
q->tail = m;
} else {
q->head = q->tail = m;
}
q->count++;
}
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
{
mtx_lock(&q->lock);
__mbq_enqueue(q, m);
mtx_unlock(&q->lock);
}
void mbq_enqueue(struct mbq *q, struct mbuf *m)
{
__mbq_enqueue(q, m);
}
static inline struct mbuf *__mbq_dequeue(struct mbq *q)
{
struct mbuf *ret = NULL;
if (q->head) {
ret = q->head;
q->head = ret->m_nextpkt;
if (q->head == NULL) {
q->tail = NULL;
}
q->count--;
ret->m_nextpkt = NULL;
}
return ret;
}
struct mbuf *mbq_safe_dequeue(struct mbq *q)
{
struct mbuf *ret;
mtx_lock(&q->lock);
ret = __mbq_dequeue(q);
mtx_unlock(&q->lock);
return ret;
}
struct mbuf *mbq_dequeue(struct mbq *q)
{
return __mbq_dequeue(q);
}
/* XXX seems pointless to have a generic purge */
static void __mbq_purge(struct mbq *q, int safe)
{
struct mbuf *m;
for (;;) {
m = safe ? mbq_safe_dequeue(q) : mbq_dequeue(q);
if (m) {
m_freem(m);
} else {
break;
}
}
}
void mbq_purge(struct mbq *q)
{
__mbq_purge(q, 0);
}
void mbq_safe_purge(struct mbq *q)
{
__mbq_purge(q, 1);
}
void mbq_safe_destroy(struct mbq *q)
{
mtx_destroy(&q->lock);
}
void mbq_destroy(struct mbq *q)
{
}

View File

@ -1,78 +0,0 @@
/*
* Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*/
#ifndef __NETMAP_MBQ_H__
#define __NETMAP_MBQ_H__
/*
* These function implement an mbuf tailq with an optional lock.
* The base functions act ONLY ON THE QUEUE, whereas the "safe"
* variants (mbq_safe_*) also handle the lock.
*/
/* XXX probably rely on a previous definition of SPINLOCK_T */
#ifdef linux
#define SPINLOCK_T safe_spinlock_t
#else
#define SPINLOCK_T struct mtx
#endif
/* A FIFO queue of mbufs with an optional lock. */
struct mbq {
struct mbuf *head;
struct mbuf *tail;
int count;
SPINLOCK_T lock;
};
/* XXX "destroy" does not match "init" as a name.
* We should also clarify whether init can be used while
* holding a lock, and whether mbq_safe_destroy() is a NOP.
*/
void mbq_init(struct mbq *q);
void mbq_destroy(struct mbq *q);
void mbq_enqueue(struct mbq *q, struct mbuf *m);
struct mbuf *mbq_dequeue(struct mbq *q);
void mbq_purge(struct mbq *q);
/* XXX missing mbq_lock() and mbq_unlock */
void mbq_safe_init(struct mbq *q);
void mbq_safe_destroy(struct mbq *q);
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);
struct mbuf *mbq_safe_dequeue(struct mbq *q);
void mbq_safe_purge(struct mbq *q);
static inline unsigned int mbq_len(struct mbq *q)
{
return q->count;
}
#endif /* __NETMAP_MBQ_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,227 +0,0 @@
/*
* Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/dev/netmap/netmap_mem2.c 234290 2012-04-14 16:44:18Z luigi $
*
* (New) memory allocator for netmap
*/
/*
* This allocator creates three memory pools:
* nm_if_pool for the struct netmap_if
* nm_ring_pool for the struct netmap_ring
* nm_buf_pool for the packet buffers.
*
* that contain netmap objects. Each pool is made of a number of clusters,
* multiple of a page size, each containing an integer number of objects.
* The clusters are contiguous in user space but not in the kernel.
* Only nm_buf_pool needs to be dma-able,
* but for convenience use the same type of allocator for all.
*
* Once mapped, the three pools are exported to userspace
* as a contiguous block, starting from nm_if_pool. Each
* cluster (and pool) is an integral number of pages.
* [ . . . ][ . . . . . .][ . . . . . . . . . .]
* nm_if nm_ring nm_buf
*
* The userspace areas contain offsets of the objects in userspace.
* When (at init time) we write these offsets, we find out the index
* of the object, and from there locate the offset from the beginning
* of the region.
*
* The invididual allocators manage a pool of memory for objects of
* the same size.
* The pool is split into smaller clusters, whose size is a
* multiple of the page size. The cluster size is chosen
* to minimize the waste for a given max cluster size
* (we do it by brute force, as we have relatively few objects
* per cluster).
*
* Objects are aligned to the cache line (64 bytes) rounding up object
* sizes when needed. A bitmap contains the state of each object.
* Allocation scans the bitmap; this is done only on attach, so we are not
* too worried about performance
*
* For each allocator we can define (thorugh sysctl) the size and
* number of each object. Memory is allocated at the first use of a
* netmap file descriptor, and can be freed when all such descriptors
* have been released (including unmapping the memory).
* If memory is scarce, the system tries to get as much as possible
* and the sysctl values reflect the actual allocation.
* Together with desired values, the sysctl export also absolute
* min and maximum values that cannot be overridden.
*
* struct netmap_if:
* variable size, max 16 bytes per ring pair plus some fixed amount.
* 1024 bytes should be large enough in practice.
*
* In the worst case we have one netmap_if per ring in the system.
*
* struct netmap_ring
* variable size, 8 byte per slot plus some fixed amount.
* Rings can be large (e.g. 4k slots, or >32Kbytes).
* We default to 36 KB (9 pages), and a few hundred rings.
*
* struct netmap_buffer
* The more the better, both because fast interfaces tend to have
* many slots, and because we may want to use buffers to store
* packets in userspace avoiding copies.
* Must contain a full frame (eg 1518, or more for vlans, jumbo
* frames etc.) plus be nicely aligned, plus some NICs restrict
* the size to multiple of 1K or so. Default to 2K
*/
#ifndef _NET_NETMAP_MEM2_H_
#define _NET_NETMAP_MEM2_H_
#define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */
#define NETMAP_POOL_MAX_NAMSZ 32
enum {
NETMAP_IF_POOL = 0,
NETMAP_RING_POOL,
NETMAP_BUF_POOL,
NETMAP_POOLS_NR
};
struct netmap_obj_params {
u_int size;
u_int num;
};
struct netmap_obj_pool {
char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */
/* ---------------------------------------------------*/
/* these are only meaningful if the pool is finalized */
/* (see 'finalized' field in netmap_mem_d) */
u_int objtotal; /* actual total number of objects. */
u_int memtotal; /* actual total memory space */
u_int numclusters; /* actual number of clusters */
u_int objfree; /* number of free objects. */
struct lut_entry *lut; /* virt,phys addresses, objtotal entries */
uint32_t *bitmap; /* one bit per buffer, 1 means free */
uint32_t bitmap_slots; /* number of uint32 entries in bitmap */
/* ---------------------------------------------------*/
/* limits */
u_int objminsize; /* minimum object size */
u_int objmaxsize; /* maximum object size */
u_int nummin; /* minimum number of objects */
u_int nummax; /* maximum number of objects */
/* these are changed only by config */
u_int _objtotal; /* total number of objects */
u_int _objsize; /* object size */
u_int _clustsize; /* cluster size */
u_int _clustentries; /* objects per cluster */
u_int _numclusters; /* number of clusters */
/* requested values */
u_int r_objtotal;
u_int r_objsize;
};
#ifdef linux
// XXX a mtx would suffice here 20130415 lr
#define NMA_LOCK_T struct semaphore
#else /* !linux */
#define NMA_LOCK_T struct mtx
#endif /* linux */
typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
typedef uint16_t nm_memid_t;
/* We implement two kinds of netmap_mem_d structures:
*
* - global: used by hardware NICS;
*
* - private: used by VALE ports.
*
* In both cases, the netmap_mem_d structure has the same lifetime as the
* netmap_adapter of the corresponding NIC or port. It is the responsibility of
* the client code to delete the private allocator when the associated
* netmap_adapter is freed (this is implemented by the NAF_MEM_OWNER flag in
* netmap.c). The 'refcount' field counts the number of active users of the
* structure. The global allocator uses this information to prevent/allow
* reconfiguration. The private allocators release all their memory when there
* are no active users. By 'active user' we mean an existing netmap_priv
* structure holding a reference to the allocator.
*/
struct netmap_mem_d {
NMA_LOCK_T nm_mtx; /* protect the allocator */
u_int nm_totalsize; /* shorthand */
u_int flags;
#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
#define NETMAP_MEM_PRIVATE 0x2 /* uses private address space */
int lasterr; /* last error for curr config */
int refcount; /* existing priv structures */
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
netmap_mem_config_t config;
netmap_mem_finalize_t finalize;
netmap_mem_deref_t deref;
nm_memid_t nm_id; /* allocator identifier */
/* list of all existing allocators, sorted by nm_id */
struct netmap_mem_d *prev, *next;
};
extern struct netmap_mem_d nm_mem;
vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
int netmap_mem_finalize(struct netmap_mem_d *);
int netmap_mem_init(void);
void netmap_mem_fini(void);
struct netmap_if *
netmap_mem_if_new(const char *, struct netmap_adapter *);
void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
void netmap_mem_deref(struct netmap_mem_d *);
int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new(const char *name,
u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
int* error);
void netmap_mem_private_delete(struct netmap_mem_d *);
#define NETMAP_BDG_BUF_SIZE(n) ((n)->pools[NETMAP_BUF_POOL]._objsize)
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
#endif

View File

@ -1,401 +0,0 @@
/*
* Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $FreeBSD: head/sys/dev/netmap/netmap_offloadings.c 261909 2014-02-15 04:53:04Z luigi $ */
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/sockio.h>
#include <sys/socketvar.h> /* struct socket */
#include <sys/socket.h> /* sockaddrs */
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/endian.h>
#elif defined(linux)
#include "bsd_glue.h"
#elif defined(__APPLE__)
#warning OSX support is only partial
#include "osx_glue.h"
#else
#error Unsupported platform
#endif /* unsupported */
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
/* This routine is called by bdg_mismatch_datapath() when it finishes
* accumulating bytes for a segment, in order to fix some fields in the
* segment headers (which still contain the same content as the header
* of the original GSO packet). 'buf' points to the beginning (e.g.
* the ethernet header) of the segment, and 'len' is its length.
*/
static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
u_int segmented_bytes, u_int last_segment,
u_int tcp, u_int iphlen)
{
struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
uint16_t *check = NULL;
uint8_t *check_data = NULL;
if (iphlen == 20) {
/* Set the IPv4 "Total Length" field. */
iph->tot_len = htobe16(len-14);
ND("ip total length %u", be16toh(ip->tot_len));
/* Set the IPv4 "Identification" field. */
iph->id = htobe16(be16toh(iph->id) + idx);
ND("ip identification %u", be16toh(iph->id));
/* Compute and insert the IPv4 header checksum. */
iph->check = 0;
iph->check = nm_csum_ipv4(iph);
ND("IP csum %x", be16toh(iph->check));
} else {/* if (iphlen == 40) */
/* Set the IPv6 "Payload Len" field. */
ip6h->payload_len = htobe16(len-14-iphlen);
}
if (tcp) {
struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
/* Set the TCP sequence number. */
tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
ND("tcp seq %u", be32toh(tcph->seq));
/* Zero the PSH and FIN TCP flags if this is not the last
segment. */
if (!last_segment)
tcph->flags &= ~(0x8 | 0x1);
ND("last_segment %u", last_segment);
check = &tcph->check;
check_data = (uint8_t *)tcph;
} else { /* UDP */
struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
/* Set the UDP 'Length' field. */
udph->len = htobe16(len-14-iphlen);
check = &udph->check;
check_data = (uint8_t *)udph;
}
/* Compute and insert TCP/UDP checksum. */
*check = 0;
if (iphlen == 20)
nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
else
nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
ND("TCP/UDP csum %x", be16toh(*check));
}
/* The VALE mismatch datapath implementation. */
void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
struct netmap_vp_adapter *dst_na,
struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
u_int *j, u_int lim, u_int *howmany)
{
struct netmap_slot *slot = NULL;
struct nm_vnet_hdr *vh = NULL;
/* Number of source slots to process. */
u_int frags = ft_p->ft_frags;
struct nm_bdg_fwd *ft_end = ft_p + frags;
/* Source and destination pointers. */
uint8_t *dst, *src;
size_t src_len, dst_len;
u_int j_start = *j;
u_int dst_slots = 0;
/* If the source port uses the offloadings, while destination doesn't,
* we grab the source virtio-net header and do the offloadings here.
*/
if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
}
/* Init source and dest pointers. */
src = ft_p->ft_buf;
src_len = ft_p->ft_len;
slot = &ring->slot[*j];
dst = BDG_NMB(&dst_na->up, slot);
dst_len = src_len;
/* We are processing the first input slot and there is a mismatch
* between source and destination virt_hdr_len (SHL and DHL).
* When the a client is using virtio-net headers, the header length
* can be:
* - 10: the header corresponds to the struct nm_vnet_hdr
* - 12: the first 10 bytes correspond to the struct
* virtio_net_hdr, and the last 2 bytes store the
* "mergeable buffers" info, which is an optional
* hint that can be zeroed for compability
*
* The destination header is therefore built according to the
* following table:
*
* SHL | DHL | destination header
* -----------------------------
* 0 | 10 | zero
* 0 | 12 | zero
* 10 | 0 | doesn't exist
* 10 | 12 | first 10 bytes are copied from source header, last 2 are zero
* 12 | 0 | doesn't exist
* 12 | 10 | copied from the first 10 bytes of source header
*/
bzero(dst, dst_na->virt_hdr_len);
if (na->virt_hdr_len && dst_na->virt_hdr_len)
memcpy(dst, src, sizeof(struct nm_vnet_hdr));
/* Skip the virtio-net headers. */
src += na->virt_hdr_len;
src_len -= na->virt_hdr_len;
dst += dst_na->virt_hdr_len;
dst_len = dst_na->virt_hdr_len + src_len;
/* Here it could be dst_len == 0 (which implies src_len == 0),
* so we avoid passing a zero length fragment.
*/
if (dst_len == 0) {
ft_p++;
src = ft_p->ft_buf;
src_len = ft_p->ft_len;
dst_len = src_len;
}
if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
u_int gso_bytes = 0;
/* Length of the GSO packet header. */
u_int gso_hdr_len = 0;
/* Pointer to the GSO packet header. Assume it is in a single fragment. */
uint8_t *gso_hdr = NULL;
/* Index of the current segment. */
u_int gso_idx = 0;
/* Payload data bytes segmented so far (e.g. TCP data bytes). */
u_int segmented_bytes = 0;
/* Length of the IP header (20 if IPv4, 40 if IPv6). */
u_int iphlen = 0;
/* Is this a TCP or an UDP GSO packet? */
u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
/* Segment the GSO packet contained into the input slots (frags). */
while (ft_p != ft_end) {
size_t copy;
/* Grab the GSO header if we don't have it. */
if (!gso_hdr) {
uint16_t ethertype;
gso_hdr = src;
/* Look at the 'Ethertype' field to see if this packet
* is IPv4 or IPv6.
*/
ethertype = be16toh(*((uint16_t *)(gso_hdr + 12)));
if (ethertype == 0x0800)
iphlen = 20;
else /* if (ethertype == 0x86DD) */
iphlen = 40;
ND(3, "type=%04x", ethertype);
/* Compute gso_hdr_len. For TCP we need to read the
* content of the 'Data Offset' field.
*/
if (tcp) {
struct nm_tcphdr *tcph =
(struct nm_tcphdr *)&gso_hdr[14+iphlen];
gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
} else
gso_hdr_len = 14 + iphlen + 8; /* UDP */
ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
dst_na->mfs);
/* Advance source pointers. */
src += gso_hdr_len;
src_len -= gso_hdr_len;
if (src_len == 0) {
ft_p++;
if (ft_p == ft_end)
break;
src = ft_p->ft_buf;
src_len = ft_p->ft_len;
continue;
}
}
/* Fill in the header of the current segment. */
if (gso_bytes == 0) {
memcpy(dst, gso_hdr, gso_hdr_len);
gso_bytes = gso_hdr_len;
}
/* Fill in data and update source and dest pointers. */
copy = src_len;
if (gso_bytes + copy > dst_na->mfs)
copy = dst_na->mfs - gso_bytes;
memcpy(dst + gso_bytes, src, copy);
gso_bytes += copy;
src += copy;
src_len -= copy;
/* A segment is complete or we have processed all the
the GSO payload bytes. */
if (gso_bytes >= dst_na->mfs ||
(src_len == 0 && ft_p + 1 == ft_end)) {
/* After raw segmentation, we must fix some header
* fields and compute checksums, in a protocol dependent
* way. */
gso_fix_segment(dst, gso_bytes, gso_idx,
segmented_bytes,
src_len == 0 && ft_p + 1 == ft_end,
tcp, iphlen);
ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
slot->len = gso_bytes;
slot->flags = 0;
segmented_bytes += gso_bytes - gso_hdr_len;
dst_slots++;
/* Next destination slot. */
*j = nm_next(*j, lim);
slot = &ring->slot[*j];
dst = BDG_NMB(&dst_na->up, slot);
gso_bytes = 0;
gso_idx++;
}
/* Next input slot. */
if (src_len == 0) {
ft_p++;
if (ft_p == ft_end)
break;
src = ft_p->ft_buf;
src_len = ft_p->ft_len;
}
}
ND(3, "%d bytes segmented", segmented_bytes);
} else {
/* Address of a checksum field into a destination slot. */
uint16_t *check = NULL;
/* Accumulator for an unfolded checksum. */
rawsum_t csum = 0;
/* Process a non-GSO packet. */
/* Init 'check' if necessary. */
if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
if (unlikely(vh->csum_offset + vh->csum_start > src_len))
D("invalid checksum request");
else
check = (uint16_t *)(dst + vh->csum_start +
vh->csum_offset);
}
while (ft_p != ft_end) {
/* Init/update the packet checksum if needed. */
if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
if (!dst_slots)
csum = nm_csum_raw(src + vh->csum_start,
src_len - vh->csum_start, 0);
else
csum = nm_csum_raw(src, src_len, csum);
}
/* Round to a multiple of 64 */
src_len = (src_len + 63) & ~63;
if (ft_p->ft_flags & NS_INDIRECT) {
if (copyin(src, dst, src_len)) {
/* Invalid user pointer, pretend len is 0. */
dst_len = 0;
}
} else {
memcpy(dst, src, (int)src_len);
}
slot->len = dst_len;
dst_slots++;
/* Next destination slot. */
*j = nm_next(*j, lim);
slot = &ring->slot[*j];
dst = BDG_NMB(&dst_na->up, slot);
/* Next source slot. */
ft_p++;
src = ft_p->ft_buf;
dst_len = src_len = ft_p->ft_len;
}
/* Finalize (fold) the checksum if needed. */
if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
*check = nm_csum_fold(csum);
}
ND(3, "using %u dst_slots", dst_slots);
/* A second pass on the desitations slots to set the slot flags,
* using the right number of destination slots.
*/
while (j_start != *j) {
slot = &ring->slot[j_start];
slot->flags = (dst_slots << 8)| NS_MOREFRAG;
j_start = nm_next(j_start, lim);
}
/* Clear NS_MOREFRAG flag on last entry. */
slot->flags = (dst_slots << 8);
}
/* Update howmany. */
if (unlikely(dst_slots > *howmany)) {
dst_slots = *howmany;
D("Slot allocation error: Should never happen");
}
*howmany -= dst_slots;
}

View File

@ -1,708 +0,0 @@
/*
* Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <sys/socket.h> /* sockaddrs */
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/refcount.h>
#elif defined(linux)
#include "bsd_glue.h"
#elif defined(__APPLE__)
#warning OSX support is only partial
#include "osx_glue.h"
#else
#error Unsupported platform
#endif /* unsupported */
/*
* common headers
*/
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#ifdef WITH_PIPES
#define NM_PIPE_MAXSLOTS 4096
int netmap_default_pipes = 0; /* default number of pipes for each nic */
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
/* allocate the pipe array in the parent adapter */
int
netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr)
{
size_t len;
int mode = nmr->nr_flags & NR_REG_MASK;
u_int npipes;
if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) {
/* this is for our parent, not for us */
return 0;
}
/* TODO: we can resize the array if the new
* request can accomodate the already existing pipes
*/
if (na->na_pipes) {
nmr->nr_arg1 = na->na_max_pipes;
return 0;
}
npipes = nmr->nr_arg1;
if (npipes == 0)
npipes = netmap_default_pipes;
nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL);
if (npipes == 0) {
/* really zero, nothing to alloc */
goto out;
}
len = sizeof(struct netmap_pipe_adapter *) * npipes;
na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (na->na_pipes == NULL)
return ENOMEM;
na->na_max_pipes = npipes;
na->na_next_pipe = 0;
out:
nmr->nr_arg1 = npipes;
return 0;
}
/* deallocate the parent array in the parent adapter */
void
netmap_pipe_dealloc(struct netmap_adapter *na)
{
if (na->na_pipes) {
ND("freeing pipes for %s", NM_IFPNAME(na->ifp));
free(na->na_pipes, M_DEVBUF);
na->na_pipes = NULL;
na->na_max_pipes = 0;
na->na_next_pipe = 0;
}
}
/* find a pipe endpoint with the given id among the parent's pipes */
static struct netmap_pipe_adapter *
netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id)
{
int i;
struct netmap_pipe_adapter *na;
for (i = 0; i < parent->na_next_pipe; i++) {
na = parent->na_pipes[i];
if (na->id == pipe_id) {
return na;
}
}
return NULL;
}
/* add a new pipe endpoint to the parent array */
static int
netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
{
if (parent->na_next_pipe >= parent->na_max_pipes) {
D("%s: no space left for pipes", NM_IFPNAME(parent->ifp));
return ENOMEM;
}
parent->na_pipes[parent->na_next_pipe] = na;
na->parent_slot = parent->na_next_pipe;
parent->na_next_pipe++;
return 0;
}
/* remove the given pipe endpoint from the parent array */
static void
netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
{
u_int n;
n = --parent->na_next_pipe;
if (n != na->parent_slot) {
parent->na_pipes[na->parent_slot] =
parent->na_pipes[n];
}
parent->na_pipes[n] = NULL;
}
static int
netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
{
struct netmap_kring *rxkring = txkring->pipe;
u_int limit; /* slots to transfer */
u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
lim_rx = rxkring->nkr_num_slots - 1;
int m, busy;
ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail);
j = rxkring->nr_hwtail; /* RX */
k = txkring->nr_hwcur; /* TX */
m = txkring->rhead - txkring->nr_hwcur; /* new slots */
if (m < 0)
m += txkring->nkr_num_slots;
limit = m;
m = rxkring->nkr_num_slots - 1; /* max avail space on destination */
busy = j - rxkring->nr_hwcur; /* busy slots */
if (busy < 0)
busy += txkring->nkr_num_slots;
m -= busy; /* subtract busy slots */
ND(2, "m %d limit %d", m, limit);
if (m < limit)
limit = m;
if (limit == 0) {
/* either the rxring is full, or nothing to send */
nm_txsync_finalize(txkring); /* actually useless */
return 0;
}
while (limit-- > 0) {
struct netmap_slot *rs = &rxkring->save_ring->slot[j];
struct netmap_slot *ts = &txkring->ring->slot[k];
struct netmap_slot tmp;
/* swap the slots */
tmp = *rs;
*rs = *ts;
*ts = tmp;
/* no need to report the buffer change */
j = nm_next(j, lim_rx);
k = nm_next(k, lim_tx);
}
wmb(); /* make sure the slots are updated before publishing them */
rxkring->nr_hwtail = j;
txkring->nr_hwcur = k;
txkring->nr_hwtail = nm_prev(k, lim_tx);
nm_txsync_finalize(txkring);
ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail, j);
wmb(); /* make sure rxkring->nr_hwtail is updated before notifying */
rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0);
return 0;
}
static int
netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
{
struct netmap_kring *txkring = rxkring->pipe;
uint32_t oldhwcur = rxkring->nr_hwcur;
ND("%s %x <- %s", rxkring->name, flags, txkring->name);
rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */
ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
rxkring->rcur, rxkring->rhead, rxkring->rtail);
rmb(); /* paired with the first wmb() in txsync */
nm_rxsync_finalize(rxkring);
if (oldhwcur != rxkring->nr_hwcur) {
/* we have released some slots, notify the other end */
wmb(); /* make sure nr_hwcur is updated before notifying */
txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0);
}
return 0;
}
/* Pipe endpoints are created and destroyed together, so that endopoints do not
* have to check for the existence of their peer at each ?xsync.
*
* To play well with the existing netmap infrastructure (refcounts etc.), we
* adopt the following strategy:
*
* 1) The first endpoint that is created also creates the other endpoint and
* grabs a reference to it.
*
* state A) user1 --> endpoint1 --> endpoint2
*
* 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
* its reference to the user:
*
* state B) user1 --> endpoint1 endpoint2 <--- user2
*
* 3) Assume that, starting from state B endpoint2 is closed. In the unregister
* callback endpoint2 notes that endpoint1 is still active and adds a reference
* from endpoint1 to itself. When user2 then releases her own reference,
* endpoint2 is not destroyed and we are back to state A. A symmetrical state
* would be reached if endpoint1 were released instead.
*
* 4) If, starting from state A, endpoint1 is closed, the destructor notes that
* it owns a reference to endpoint2 and releases it.
*
* Something similar goes on for the creation and destruction of the krings.
*/
/* netmap_pipe_krings_delete.
*
* There are two cases:
*
* 1) state is
*
* usr1 --> e1 --> e2
*
* and we are e1. We have to create both sets
* of krings.
*
* 2) state is
*
* usr1 --> e1 --> e2
*
* and we are e2. e1 is certainly registered and our
* krings already exist, but they may be hidden.
*/
static int
netmap_pipe_krings_create(struct netmap_adapter *na)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona = &pna->peer->up;
int error = 0;
if (pna->peer_ref) {
int i;
/* case 1) above */
D("%p: case 1, create everything", na);
error = netmap_krings_create(na, 0);
if (error)
goto err;
/* we also create all the rings, since we need to
* update the save_ring pointers.
* netmap_mem_rings_create (called by our caller)
* will not create the rings again
*/
error = netmap_mem_rings_create(na);
if (error)
goto del_krings1;
/* update our hidden ring pointers */
for (i = 0; i < na->num_tx_rings + 1; i++)
na->tx_rings[i].save_ring = na->tx_rings[i].ring;
for (i = 0; i < na->num_rx_rings + 1; i++)
na->rx_rings[i].save_ring = na->rx_rings[i].ring;
/* now, create krings and rings of the other end */
error = netmap_krings_create(ona, 0);
if (error)
goto del_rings1;
error = netmap_mem_rings_create(ona);
if (error)
goto del_krings2;
for (i = 0; i < ona->num_tx_rings + 1; i++)
ona->tx_rings[i].save_ring = ona->tx_rings[i].ring;
for (i = 0; i < ona->num_rx_rings + 1; i++)
ona->rx_rings[i].save_ring = ona->rx_rings[i].ring;
/* cross link the krings */
for (i = 0; i < na->num_tx_rings; i++) {
na->tx_rings[i].pipe = pna->peer->up.rx_rings + i;
na->rx_rings[i].pipe = pna->peer->up.tx_rings + i;
pna->peer->up.tx_rings[i].pipe = na->rx_rings + i;
pna->peer->up.rx_rings[i].pipe = na->tx_rings + i;
}
} else {
int i;
/* case 2) above */
/* recover the hidden rings */
ND("%p: case 2, hidden rings", na);
for (i = 0; i < na->num_tx_rings + 1; i++)
na->tx_rings[i].ring = na->tx_rings[i].save_ring;
for (i = 0; i < na->num_rx_rings + 1; i++)
na->rx_rings[i].ring = na->rx_rings[i].save_ring;
}
return 0;
del_krings2:
netmap_krings_delete(ona);
del_rings1:
netmap_mem_rings_delete(na);
del_krings1:
netmap_krings_delete(na);
err:
return error;
}
/* netmap_pipe_reg.
*
* There are two cases on registration (onoff==1)
*
* 1.a) state is
*
* usr1 --> e1 --> e2
*
* and we are e1. Nothing special to do.
*
* 1.b) state is
*
* usr1 --> e1 --> e2 <-- usr2
*
* and we are e2. Drop the ref e1 is holding.
*
* There are two additional cases on unregister (onoff==0)
*
* 2.a) state is
*
* usr1 --> e1 --> e2
*
* and we are e1. Nothing special to do, e2 will
* be cleaned up by the destructor of e1.
*
* 2.b) state is
*
* usr1 --> e1 e2 <-- usr2
*
* and we are either e1 or e2. Add a ref from the
* other end and hide our rings.
*/
static int
netmap_pipe_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
struct ifnet *ifp = na->ifp;
ND("%p: onoff %d", na, onoff);
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
} else {
ifp->if_capenable &= ~IFCAP_NETMAP;
}
if (pna->peer_ref) {
ND("%p: case 1.a or 2.a, nothing to do", na);
return 0;
}
if (onoff) {
ND("%p: case 1.b, drop peer", na);
pna->peer->peer_ref = 0;
netmap_adapter_put(na);
} else {
int i;
ND("%p: case 2.b, grab peer", na);
netmap_adapter_get(na);
pna->peer->peer_ref = 1;
/* hide our rings from netmap_mem_rings_delete */
for (i = 0; i < na->num_tx_rings + 1; i++) {
na->tx_rings[i].ring = NULL;
}
for (i = 0; i < na->num_rx_rings + 1; i++) {
na->rx_rings[i].ring = NULL;
}
}
return 0;
}
/* netmap_pipe_krings_delete.
*
* There are two cases:
*
* 1) state is
*
* usr1 --> e1 --> e2
*
* and we are e1 (e2 is not registered, so krings_delete cannot be
* called on it);
*
* 2) state is
*
* usr1 --> e1 e2 <-- usr2
*
* and we are either e1 or e2.
*
* In the former case we have to also delete the krings of e2;
* in the latter case we do nothing (note that our krings
* have already been hidden in the unregister callback).
*/
static void
netmap_pipe_krings_delete(struct netmap_adapter *na)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona; /* na of the other end */
int i;
if (!pna->peer_ref) {
ND("%p: case 2, kept alive by peer", na);
return;
}
/* case 1) above */
ND("%p: case 1, deleting everyhing", na);
netmap_krings_delete(na); /* also zeroes tx_rings etc. */
/* restore the ring to be deleted on the peer */
ona = &pna->peer->up;
if (ona->tx_rings == NULL) {
/* already deleted, we must be on an
* cleanup-after-error path */
return;
}
for (i = 0; i < ona->num_tx_rings + 1; i++)
ona->tx_rings[i].ring = ona->tx_rings[i].save_ring;
for (i = 0; i < ona->num_rx_rings + 1; i++)
ona->rx_rings[i].ring = ona->rx_rings[i].save_ring;
netmap_mem_rings_delete(ona);
netmap_krings_delete(ona);
}
static void
netmap_pipe_dtor(struct netmap_adapter *na)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
ND("%p", na);
if (pna->peer_ref) {
ND("%p: clean up peer", na);
pna->peer_ref = 0;
netmap_adapter_put(&pna->peer->up);
}
if (pna->role == NR_REG_PIPE_MASTER)
netmap_pipe_remove(pna->parent, pna);
netmap_adapter_put(pna->parent);
free(na->ifp, M_DEVBUF);
na->ifp = NULL;
pna->parent = NULL;
}
int
netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
{
struct nmreq pnmr;
struct netmap_adapter *pna; /* parent adapter */
struct netmap_pipe_adapter *mna, *sna, *req;
struct ifnet *ifp, *ifp2;
u_int pipe_id;
int role = nmr->nr_flags & NR_REG_MASK;
int error;
ND("flags %x", nmr->nr_flags);
if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) {
ND("not a pipe");
return 0;
}
role = nmr->nr_flags & NR_REG_MASK;
/* first, try to find the parent adapter */
bzero(&pnmr, sizeof(pnmr));
memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
/* pass to parent the requested number of pipes */
pnmr.nr_arg1 = nmr->nr_arg1;
error = netmap_get_na(&pnmr, &pna, create);
if (error) {
ND("parent lookup failed: %d", error);
return error;
}
ND("found parent: %s", NM_IFPNAME(pna->ifp));
if (NETMAP_OWNED_BY_KERN(pna)) {
ND("parent busy");
error = EBUSY;
goto put_out;
}
/* next, lookup the pipe id in the parent list */
req = NULL;
pipe_id = nmr->nr_ringid & NETMAP_RING_MASK;
mna = netmap_pipe_find(pna, pipe_id);
if (mna) {
if (mna->role == role) {
ND("found %d directly at %d", pipe_id, mna->parent_slot);
req = mna;
} else {
ND("found %d indirectly at %d", pipe_id, mna->parent_slot);
req = mna->peer;
}
/* the pipe we have found already holds a ref to the parent,
* so we need to drop the one we got from netmap_get_na()
*/
netmap_adapter_put(pna);
goto found;
}
ND("pipe %d not found, create %d", pipe_id, create);
if (!create) {
error = ENODEV;
goto put_out;
}
/* we create both master and slave.
* The endpoint we were asked for holds a reference to
* the other one.
*/
ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
if (!ifp) {
error = ENOMEM;
goto put_out;
}
strcpy(ifp->if_xname, NM_IFPNAME(pna->ifp));
mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (mna == NULL) {
error = ENOMEM;
goto free_ifp;
}
mna->up.ifp = ifp;
mna->id = pipe_id;
mna->role = NR_REG_PIPE_MASTER;
mna->parent = pna;
mna->up.nm_txsync = netmap_pipe_txsync;
mna->up.nm_rxsync = netmap_pipe_rxsync;
mna->up.nm_register = netmap_pipe_reg;
mna->up.nm_dtor = netmap_pipe_dtor;
mna->up.nm_krings_create = netmap_pipe_krings_create;
mna->up.nm_krings_delete = netmap_pipe_krings_delete;
mna->up.nm_mem = pna->nm_mem;
mna->up.na_lut = pna->na_lut;
mna->up.na_lut_objtotal = pna->na_lut_objtotal;
mna->up.num_tx_rings = 1;
mna->up.num_rx_rings = 1;
mna->up.num_tx_desc = nmr->nr_tx_slots;
nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
1, NM_PIPE_MAXSLOTS, NULL);
mna->up.num_rx_desc = nmr->nr_rx_slots;
nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
1, NM_PIPE_MAXSLOTS, NULL);
error = netmap_attach_common(&mna->up);
if (error)
goto free_mna;
/* register the master with the parent */
error = netmap_pipe_add(pna, mna);
if (error)
goto free_mna;
/* create the slave */
ifp2 = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
if (!ifp) {
error = ENOMEM;
goto free_mna;
}
strcpy(ifp2->if_xname, NM_IFPNAME(pna->ifp));
sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (sna == NULL) {
error = ENOMEM;
goto free_ifp2;
}
/* most fields are the same, copy from master and then fix */
*sna = *mna;
sna->up.ifp = ifp2;
sna->role = NR_REG_PIPE_SLAVE;
error = netmap_attach_common(&sna->up);
if (error)
goto free_sna;
/* join the two endpoints */
mna->peer = sna;
sna->peer = mna;
/* we already have a reference to the parent, but we
* need another one for the other endpoint we created
*/
netmap_adapter_get(pna);
if (role == NR_REG_PIPE_MASTER) {
req = mna;
mna->peer_ref = 1;
netmap_adapter_get(&sna->up);
} else {
req = sna;
sna->peer_ref = 1;
netmap_adapter_get(&mna->up);
}
ND("created master %p and slave %p", mna, sna);
found:
ND("pipe %d %s at %p", pipe_id,
(req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req);
*na = &req->up;
netmap_adapter_get(*na);
/* write the configuration back */
nmr->nr_tx_rings = req->up.num_tx_rings;
nmr->nr_rx_rings = req->up.num_rx_rings;
nmr->nr_tx_slots = req->up.num_tx_desc;
nmr->nr_rx_slots = req->up.num_rx_desc;
/* keep the reference to the parent.
* It will be released by the req destructor
*/
return 0;
free_sna:
free(sna, M_DEVBUF);
free_ifp2:
free(ifp2, M_DEVBUF);
free_mna:
free(mna, M_DEVBUF);
free_ifp:
free(ifp, M_DEVBUF);
put_out:
netmap_adapter_put(pna);
return error;
}
#endif /* WITH_PIPES */

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +0,0 @@
# $FreeBSD$
#
# Compile netmap as a module, useful if you want a netmap bridge
# or loadable drivers.
.PATH: ${.CURDIR}/../../dev/netmap
.PATH.h: ${.CURDIR}/../../net
CFLAGS += -I${.CURDIR}/../../
KMOD = netmap
SRCS = device_if.h bus_if.h opt_netmap.h
SRCS += netmap.c netmap.h netmap_kern.h
SRCS += netmap_mem2.c netmap_mem2.h
SRCS += netmap_generic.c
SRCS += netmap_mbq.c netmap_mbq.h
SRCS += netmap_vale.c
SRCS += netmap_freebsd.c
SRCS += netmap_offloadings.c
SRCS += netmap_pipe.c
.include <bsd.kmod.mk>

View File

@ -1,550 +0,0 @@
/*
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $
*
* Definitions of constants and the structures used by the netmap
* framework, for the part visible to both kernel and userspace.
* Detailed info on netmap is available with "man netmap" or at
*
* http://info.iet.unipi.it/~luigi/netmap/
*
* This API is also used to communicate with the VALE software switch
*/
#ifndef _NET_NETMAP_H_
#define _NET_NETMAP_H_
#define NETMAP_API 11 /* current API version */
#define NETMAP_MIN_API 11 /* min and max versions accepted */
#define NETMAP_MAX_API 15
/*
* Some fields should be cache-aligned to reduce contention.
* The alignment is architecture and OS dependent, but rather than
* digging into OS headers to find the exact value we use an estimate
* that should cover most architectures.
*/
#define NM_CACHE_ALIGN 128
/*
* --- Netmap data structures ---
*
* The userspace data structures used by netmap are shown below.
* They are allocated by the kernel and mmap()ed by userspace threads.
* Pointers are implemented as memory offsets or indexes,
* so that they can be easily dereferenced in kernel and userspace.
KERNEL (opaque, obviously)
====================================================================
|
USERSPACE | struct netmap_ring
+---->+---------------+
/ | head,cur,tail |
struct netmap_if (nifp, 1 per fd) / | buf_ofs |
+---------------+ / | other fields |
| ni_tx_rings | / +===============+
| ni_rx_rings | / | buf_idx, len | slot[0]
| | / | flags, ptr |
| | / +---------------+
+===============+ / | buf_idx, len | slot[1]
| txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
| txring_ofs[1] | +---------------+
(tx+1 entries) (num_slots entries)
| txring_ofs[t] | | buf_idx, len | slot[n-1]
+---------------+ | flags, ptr |
| rxring_ofs[0] | +---------------+
| rxring_ofs[1] |
(rx+1 entries)
| rxring_ofs[r] |
+---------------+
* For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
* a file descriptor, the mmap()ed region contains a (logically readonly)
* struct netmap_if pointing to struct netmap_ring's.
*
* There is one netmap_ring per physical NIC ring, plus one tx/rx ring
* pair attached to the host stack (this pair is unused for non-NIC ports).
*
* All physical/host stack ports share the same memory region,
* so that zero-copy can be implemented between them.
* VALE switch ports instead have separate memory regions.
*
* The netmap_ring is the userspace-visible replica of the NIC ring.
* Each slot has the index of a buffer (MTU-sized and residing in the
* mmapped region), its length and some flags. An extra 64-bit pointer
* is provided for user-supplied buffers in the tx path.
*
* In user space, the buffer address is computed as
* (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
*
* Added in NETMAP_API 11:
*
* + NIOCREGIF can request the allocation of extra spare buffers from
* the same memory pool. The desired number of buffers must be in
* nr_arg3. The ioctl may return fewer buffers, depending on memory
* availability. nr_arg3 will return the actual value, and, once
* mapped, nifp->ni_bufs_head will be the index of the first buffer.
*
* The buffers are linked to each other using the first uint32_t
* as the index. On close, ni_bufs_head must point to the list of
* buffers to be released.
*
* + NIOCREGIF can request space for extra rings (and buffers)
* allocated in the same memory space. The number of extra rings
* is in nr_arg1, and is advisory. This is a no-op on NICs where
* the size of the memory space is fixed.
*
* + NIOCREGIF can attach to PIPE rings sharing the same memory
* space with a parent device. The ifname indicates the parent device,
* which must already exist. Flags in nr_flags indicate if we want to
* bind the master or slave side, the index (from nr_ringid)
* is just a cookie and does need to be sequential.
*
* + NIOCREGIF can also attach to 'monitor' rings that replicate
* the content of specific rings, also from the same memory space.
*
* Extra flags in nr_flags support the above functions.
* Application libraries may use the following naming scheme:
* netmap:foo all NIC ring pairs
* netmap:foo^ only host ring pair
* netmap:foo+ all NIC ring + host ring pairs
* netmap:foo-k the k-th NIC ring pair
* netmap:foo{k PIPE ring pair k, master side
* netmap:foo}k PIPE ring pair k, slave side
*/
/*
* struct netmap_slot is a buffer descriptor
*/
struct netmap_slot {
uint32_t buf_idx; /* buffer index */
uint16_t len; /* length for this slot */
uint16_t flags; /* buf changed, etc. */
uint64_t ptr; /* pointer for indirect buffers */
};
/*
* The following flags control how the slot is used
*/
#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */
/*
* must be set whenever buf_idx is changed (as it might be
* necessary to recompute the physical address and mapping)
*/
#define NS_REPORT 0x0002 /* ask the hardware to report results */
/*
* Request notification when slot is used by the hardware.
* Normally transmit completions are handled lazily and
* may be unreported. This flag lets us know when a slot
* has been sent (e.g. to terminate the sender).
*/
#define NS_FORWARD 0x0004 /* pass packet 'forward' */
/*
* (Only for physical ports, rx rings with NR_FORWARD set).
* Slot released to the kernel (i.e. before ring->head) with
* this flag set are passed to the peer ring (host/NIC),
* thus restoring the host-NIC connection for these slots.
* This supports efficient traffic monitoring or firewalling.
*/
#define NS_NO_LEARN 0x0008 /* disable bridge learning */
/*
* On a VALE switch, do not 'learn' the source port for
* this buffer.
*/
#define NS_INDIRECT 0x0010 /* userspace buffer */
/*
* (VALE tx rings only) data is in a userspace buffer,
* whose address is in the 'ptr' field in the slot.
*/
#define NS_MOREFRAG 0x0020 /* packet has more fragments */
/*
* (VALE ports only)
* Set on all but the last slot of a multi-segment packet.
* The 'len' field refers to the individual fragment.
*/
#define NS_PORT_SHIFT 8
#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
/*
* The high 8 bits of the flag, if not zero, indicate the
* destination port for the VALE switch, overriding
* the lookup table.
*/
#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
/*
* (VALE rx rings only) the high 8 bits
* are the number of fragments.
*/
/*
* struct netmap_ring
*
* Netmap representation of a TX or RX ring (also known as "queue").
* This is a queue implemented as a fixed-size circular array.
* At the software level the important fields are: head, cur, tail.
*
* In TX rings:
*
* head first slot available for transmission.
* cur wakeup point. select() and poll() will unblock
* when 'tail' moves past 'cur'
* tail (readonly) first slot reserved to the kernel
*
* [head .. tail-1] can be used for new packets to send;
* 'head' and 'cur' must be incremented as slots are filled
* with new packets to be sent;
* 'cur' can be moved further ahead if we need more space
* for new transmissions.
*
* In RX rings:
*
* head first valid received packet
* cur wakeup point. select() and poll() will unblock
* when 'tail' moves past 'cur'
* tail (readonly) first slot reserved to the kernel
*
* [head .. tail-1] contain received packets;
* 'head' and 'cur' must be incremented as slots are consumed
* and can be returned to the kernel;
* 'cur' can be moved further ahead if we want to wait for
* new packets without returning the previous ones.
*
* DATA OWNERSHIP/LOCKING:
* The netmap_ring, and all slots and buffers in the range
* [head .. tail-1] are owned by the user program;
* the kernel only accesses them during a netmap system call
* and in the user thread context.
*
* Other slots and buffers are reserved for use by the kernel
*/
struct netmap_ring {
/*
* buf_ofs is meant to be used through macros.
* It contains the offset of the buffer region from this
* descriptor.
*/
const int64_t buf_ofs;
const uint32_t num_slots; /* number of slots in the ring. */
const uint32_t nr_buf_size;
const uint16_t ringid;
const uint16_t dir; /* 0: tx, 1: rx */
uint32_t head; /* (u) first user slot */
uint32_t cur; /* (u) wakeup point */
uint32_t tail; /* (k) first kernel slot */
uint32_t flags;
struct timeval ts; /* (k) time of last *sync() */
/* opaque room for a mutex or similar object */
uint8_t sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN)));
/* the slots follow. This struct has variable size */
struct netmap_slot slot[0]; /* array of slots. */
};
/*
* RING FLAGS
*/
#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
/*
* updates the 'ts' field on each netmap syscall. This saves
* saves a separate gettimeofday(), and is not much worse than
* software timestamps generated in the interrupt handler.
*/
#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
/*
* Enables the NS_FORWARD slot flag for the ring.
*/
/*
* Netmap representation of an interface and its queue(s).
* This is initialized by the kernel when binding a file
* descriptor to a port, and should be considered as readonly
* by user programs. The kernel never uses it.
*
* There is one netmap_if for each file descriptor on which we want
* to select/poll.
* select/poll operates on one or all pairs depending on the value of
* nmr_queueid passed on the ioctl.
*/
struct netmap_if {
char ni_name[IFNAMSIZ]; /* name of the interface. */
const uint32_t ni_version; /* API version, currently unused */
const uint32_t ni_flags; /* properties */
#define NI_PRIV_MEM 0x1 /* private memory region */
/*
* The number of packet rings available in netmap mode.
* Physical NICs can have different numbers of tx and rx rings.
* Physical NICs also have a 'host' ring pair.
* Additionally, clients can request additional ring pairs to
* be used for internal communication.
*/
const uint32_t ni_tx_rings; /* number of HW tx rings */
const uint32_t ni_rx_rings; /* number of HW rx rings */
uint32_t ni_bufs_head; /* head index for extra bufs */
uint32_t ni_spare1[5];
/*
* The following array contains the offset of each netmap ring
* from this structure, in the following order:
* NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings;
* NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings.
*
* The area is filled up by the kernel on NIOCREGIF,
* and then only read by userspace code.
*/
const ssize_t ring_ofs[0];
};
#ifndef NIOCREGIF
/*
* ioctl names and related fields
*
* NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
* whose identity is set in NIOCREGIF through nr_ringid.
* These are non blocking and take no argument.
*
* NIOCGINFO takes a struct ifreq, the interface name is the input,
* the outputs are number of queues and number of descriptor
* for each queue (useful to set number of threads etc.).
* The info returned is only advisory and may change before
* the interface is bound to a file descriptor.
*
* NIOCREGIF takes an interface name within a struct nmre,
* and activates netmap mode on the interface (if possible).
*
* The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we
* can pass it down to other NIC-related ioctls.
*
* The actual argument (struct nmreq) has a number of options to request
* different functions.
* The following are used in NIOCREGIF when nr_cmd == 0:
*
* nr_name (in)
* The name of the port (em0, valeXXX:YYY, etc.)
* limited to IFNAMSIZ for backward compatibility.
*
* nr_version (in/out)
* Must match NETMAP_API as used in the kernel, error otherwise.
* Always returns the desired value on output.
*
* nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out)
* On input, non-zero values may be used to reconfigure the port
* according to the requested values, but this is not guaranteed.
* On output the actual values in use are reported.
*
* nr_ringid (in)
* Indicates how rings should be bound to the file descriptors.
* If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
* are used to indicate the ring number, and nr_flags specifies
* the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
*
* NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
* If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
* the binding as follows:
* 0 (default) binds all physical rings
* NETMAP_HW_RING | ring number binds a single ring pair
* NETMAP_SW_RING binds only the host tx/rx rings
*
* NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push
* packets on tx rings only if POLLOUT is set.
* The default is to push any pending packet.
*
* NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
* packets on rx rings also when POLLIN is NOT set.
* The default is to touch the rx ring only with POLLIN.
* Note that this is the opposite of TX because it
* reflects the common usage.
*
* NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
* NETMAP_PRIV_MEM is set on return for ports that do not use
* the global memory allocator.
* This information is not significant and applications
* should look at the region id in nr_arg2
*
* nr_flags is the recommended mode to indicate which rings should
* be bound to a file descriptor. Values are NR_REG_*
*
* nr_arg1 (in) The number of extra rings to be reserved.
* Especially when allocating a VALE port the system only
* allocates the amount of memory needed for the port.
* If more shared memory rings are desired (e.g. for pipes),
* the first invocation for the same basename/allocator
* should specify a suitable number. Memory cannot be
* extended after the first allocation without closing
* all ports on the same region.
*
* nr_arg2 (in/out) The identity of the memory region used.
* On input, 0 means the system decides autonomously,
* other values may try to select a specific region.
* On return the actual value is reported.
* Region '1' is the global allocator, normally shared
* by all interfaces. Other values are private regions.
* If two ports the same region zero-copy is possible.
*
* nr_arg3 (in/out) number of extra buffers to be allocated.
*
*
*
* nr_cmd (in) if non-zero indicates a special command:
* NETMAP_BDG_ATTACH and nr_name = vale*:ifname
* attaches the NIC to the switch; nr_ringid specifies
* which rings to use. Used by vale-ctl -a ...
* nr_arg1 = NETMAP_BDG_HOST also attaches the host port
* as in vale-ctl -h ...
*
* NETMAP_BDG_DETACH and nr_name = vale*:ifname
* disconnects a previously attached NIC.
* Used by vale-ctl -d ...
*
* NETMAP_BDG_LIST
* list the configuration of VALE switches.
*
* NETMAP_BDG_VNET_HDR
* Set the virtio-net header length used by the client
* of a VALE switch port.
*
* nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
*
*
*
*/
/*
* struct nmreq overlays a struct ifreq (just the name)
*
* On input, nr_ringid indicates which rings we are requesting,
* with the low flags for the specific ring number.
* selection FLAGS RING INDEX
*
* all the NIC rings 0x0000 -
* only HOST ring 0x2000 -
* single NIC ring 0x4000 ring index
* all the NIC+HOST rings 0x6000 -
* one pipe ring, master 0x8000 ring index
* *** INVALID 0xA000
* one pipe ring, slave 0xC000 ring index
* *** INVALID 0xE000
*
*/
struct nmreq {
char nr_name[IFNAMSIZ];
uint32_t nr_version; /* API version */
uint32_t nr_offset; /* nifp offset in the shared region */
uint32_t nr_memsize; /* size of the shared region */
uint32_t nr_tx_slots; /* slots in tx rings */
uint32_t nr_rx_slots; /* slots in rx rings */
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
uint16_t nr_ringid; /* ring(s) we care about */
#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */
#define NETMAP_SW_RING 0x2000 /* only host ring pair */
#define NETMAP_RING_MASK 0x0fff /* the ring number */
#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */
uint16_t nr_cmd;
#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
#define NETMAP_BDG_LIST 4 /* get bridge's info */
#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
uint16_t nr_arg2;
uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */
uint32_t nr_flags;
/* various modes, extends nr_ringid */
uint32_t spare2[1];
};
#define NR_REG_MASK 0xf /* values for nr_flags */
enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
NR_REG_ALL_NIC = 1,
NR_REG_SW = 2,
NR_REG_NIC_SW = 3,
NR_REG_ONE_NIC = 4,
NR_REG_PIPE_MASTER = 5,
NR_REG_PIPE_SLAVE = 6,
};
/* monitor uses the NR_REG to select the rings to monitor */
#define NR_MONITOR_TX 0x100
#define NR_MONITOR_RX 0x200
/*
* FreeBSD uses the size value embedded in the _IOWR to determine
* how much to copy in/out. So we need it to match the actual
* data structure we pass. We put some spares in the structure
* to ease compatibility with other versions
*/
#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
#endif /* !NIOCREGIF */
/*
* Helper functions for kernel and userspace
*/
/*
* check if space is available in the ring.
*/
static inline int
nm_ring_empty(struct netmap_ring *ring)
{
return (ring->cur == ring->tail);
}
#endif /* _NET_NETMAP_H_ */

View File

@ -1,677 +0,0 @@
/*
* Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*
* Functions and macros to manipulate netmap structures and packets
* in userspace. See netmap(4) for more information.
*
* The address of the struct netmap_if, say nifp, is computed from the
* value returned from ioctl(.., NIOCREG, ...) and the mmap region:
* ioctl(fd, NIOCREG, &req);
* mem = mmap(0, ... );
* nifp = NETMAP_IF(mem, req.nr_nifp);
* (so simple, we could just do it manually)
*
* From there:
* struct netmap_ring *NETMAP_TXRING(nifp, index)
* struct netmap_ring *NETMAP_RXRING(nifp, index)
* we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
*
* ring->slot[i] gives us the i-th slot (we can access
* directly len, flags, buf_idx)
*
* char *buf = NETMAP_BUF(ring, x) returns a pointer to
* the buffer numbered x
*
* All ring indexes (head, cur, tail) should always move forward.
* To compute the next index in a circular ring you can use
* i = nm_ring_next(ring, i);
*
* To ease porting apps from pcap to netmap we supply a few fuctions
* that can be called to open, close, read and write on netmap in a way
* similar to libpcap. Note that the read/write function depend on
* an ioctl()/select()/poll() being issued to refill rings or push
* packets out.
*
* In order to use these, include #define NETMAP_WITH_LIBS
* in the source file that invokes these functions.
*/
#ifndef _NET_NETMAP_USER_H_
#define _NET_NETMAP_USER_H_
#include <stdint.h>
#include <sys/socket.h> /* apple needs sockaddr */
#include <net/if.h> /* IFNAMSIZ */
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif /* likely and unlikely */
#include <net/netmap.h>
/* helper macro */
#define _NETMAP_OFFSET(type, ptr, offset) \
((type)(void *)((char *)(ptr) + (offset)))
#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
nifp, (nifp)->ring_ofs[index] )
#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
#define NETMAP_BUF(ring, index) \
((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
#define NETMAP_BUF_IDX(ring, buf) \
( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
(ring)->nr_buf_size )
static inline uint32_t
nm_ring_next(struct netmap_ring *r, uint32_t i)
{
return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
}
/*
* Return 1 if we have pending transmissions in the tx ring.
* When everything is complete ring->head = ring->tail + 1 (modulo ring size)
*/
static inline int
nm_tx_pending(struct netmap_ring *r)
{
return nm_ring_next(r, r->tail) != r->head;
}
static inline uint32_t
nm_ring_space(struct netmap_ring *ring)
{
int ret = ring->tail - ring->cur;
if (ret < 0)
ret += ring->num_slots;
return ret;
}
#ifdef NETMAP_WITH_LIBS
/*
* Support for simple I/O libraries.
* Include other system headers required for compiling this.
*/
#ifndef HAVE_NETMAP_WITH_LIBS
#define HAVE_NETMAP_WITH_LIBS
#include <sys/time.h>
#include <sys/mman.h>
#include <string.h> /* memset */
#include <sys/ioctl.h>
#include <sys/errno.h> /* EINVAL */
#include <fcntl.h> /* O_RDWR */
#include <unistd.h> /* close() */
#include <signal.h>
#include <stdlib.h>
#ifndef ND /* debug macros */
/* debug support */
#define ND(_fmt, ...) do {} while(0)
#define D(_fmt, ...) \
do { \
struct timeval t0; \
gettimeofday(&t0, NULL); \
fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \
(int)(t0.tv_sec % 1000), (int)t0.tv_usec, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); \
} while (0)
/* Rate limited version of "D", lps indicates how many per second */
#define RD(lps, format, ...) \
do { \
static int t0, __cnt; \
struct timeval __xxts; \
gettimeofday(&__xxts, NULL); \
if (t0 != __xxts.tv_sec) { \
t0 = __xxts.tv_sec; \
__cnt = 0; \
} \
if (__cnt++ < lps) { \
D(format, ##__VA_ARGS__); \
} \
} while (0)
#endif
struct nm_pkthdr { /* same as pcap_pkthdr */
struct timeval ts;
uint32_t caplen;
uint32_t len;
};
struct nm_stat { /* same as pcap_stat */
u_int ps_recv;
u_int ps_drop;
u_int ps_ifdrop;
#ifdef WIN32
u_int bs_capt;
#endif /* WIN32 */
};
#define NM_ERRBUF_SIZE 512
struct nm_desc {
struct nm_desc *self; /* point to self if netmap. */
int fd;
void *mem;
int memsize;
int done_mmap; /* set if mem is the result of mmap */
struct netmap_if * const nifp;
uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
struct nmreq req; /* also contains the nr_name = ifname */
struct nm_pkthdr hdr;
/*
* The memory contains netmap_if, rings and then buffers.
* Given a pointer (e.g. to nm_inject) we can compare with
* mem/buf_start/buf_end to tell if it is a buffer or
* some other descriptor in our region.
* We also store a pointer to some ring as it helps in the
* translation from buffer indexes to addresses.
*/
struct netmap_ring * const some_ring;
void * const buf_start;
void * const buf_end;
/* parameters from pcap_open_live */
int snaplen;
int promisc;
int to_ms;
char *errbuf;
/* save flags so we can restore them on close */
uint32_t if_flags;
uint32_t if_reqcap;
uint32_t if_curcap;
struct nm_stat st;
char msg[NM_ERRBUF_SIZE];
};
/*
* when the descriptor is open correctly, d->self == d
* Eventually we should also use some magic number.
*/
#define P2NMD(p) ((struct nm_desc *)(p))
#define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d))
#define NETMAP_FD(d) (P2NMD(d)->fd)
/*
* this is a slightly optimized copy routine which rounds
* to multiple of 64 bytes and is often faster than dealing
* with other odd sizes. We assume there is enough room
* in the source and destination buffers.
*
* XXX only for multiples of 64 bytes, non overlapped.
*/
static inline void
nm_pkt_copy(const void *_src, void *_dst, int l)
{
const uint64_t *src = (const uint64_t *)_src;
uint64_t *dst = (uint64_t *)_dst;
if (unlikely(l >= 1024)) {
memcpy(dst, src, l);
return;
}
for (; likely(l > 0); l-=64) {
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
/*
* The callback, invoked on each received packet. Same as libpcap
*/
typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
/*
*--- the pcap-like API ---
*
* nm_open() opens a file descriptor, binds to a port and maps memory.
*
* ifname (netmap:foo or vale:foo) is the port name
* a suffix can indicate the follwing:
* ^ bind the host (sw) ring pair
* * bind host and NIC ring pairs (transparent)
* -NN bind individual NIC ring pair
* {NN bind master side of pipe NN
* }NN bind slave side of pipe NN
*
* req provides the initial values of nmreq before parsing ifname.
* Remember that the ifname parsing will override the ring
* number in nm_ringid, and part of nm_flags;
* flags special functions, normally 0
* indicates which fields of *arg are significant
* arg special functions, normally NULL
* if passed a netmap_desc with mem != NULL,
* use that memory instead of mmap.
*/
static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req,
uint64_t flags, const struct nm_desc *arg);
/*
* nm_open can import some fields from the parent descriptor.
* These flags control which ones.
* Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL,
* which set the initial value for these flags.
* Note that the 16 low bits of the flags are reserved for data
* that may go into the nmreq.
*/
enum {
NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */
NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */
NM_OPEN_ARG1 = 0x100000,
NM_OPEN_ARG2 = 0x200000,
NM_OPEN_ARG3 = 0x400000,
NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */
};
/*
* nm_close() closes and restores the port to its previous state
*/
static int nm_close(struct nm_desc *);
/*
* nm_inject() is the same as pcap_inject()
* nm_dispatch() is the same as pcap_dispatch()
* nm_nextpkt() is the same as pcap_next()
*/
static int nm_inject(struct nm_desc *, const void *, size_t);
static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
/*
* Try to open, return descriptor if successful, NULL otherwise.
* An invalid netmap name will return errno = 0;
* You can pass a pointer to a pre-filled nm_desc to add special
* parameters. Flags is used as follows
* NM_OPEN_NO_MMAP use the memory from arg, only
* if the nr_arg2 (memory block) matches.
* NM_OPEN_ARG1 use req.nr_arg1 from arg
* NM_OPEN_ARG2 use req.nr_arg2 from arg
* NM_OPEN_RING_CFG user ring config from arg
*/
static struct nm_desc *
nm_open(const char *ifname, const struct nmreq *req,
uint64_t new_flags, const struct nm_desc *arg)
{
struct nm_desc *d = NULL;
const struct nm_desc *parent = arg;
u_int namelen;
uint32_t nr_ringid = 0, nr_flags;
const char *port = NULL;
const char *errmsg = NULL;
if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
errno = 0; /* name not recognised, not an error */
return NULL;
}
if (ifname[0] == 'n')
ifname += 7;
/* scan for a separator */
for (port = ifname; *port && !index("-*^{}", *port); port++)
;
namelen = port - ifname;
if (namelen >= sizeof(d->req.nr_name)) {
errmsg = "name too long";
goto fail;
}
switch (*port) {
default: /* '\0', no suffix */
nr_flags = NR_REG_ALL_NIC;
break;
case '-': /* one NIC */
nr_flags = NR_REG_ONE_NIC;
nr_ringid = atoi(port + 1);
break;
case '*': /* NIC and SW, ignore port */
nr_flags = NR_REG_NIC_SW;
if (port[1]) {
errmsg = "invalid port for nic+sw";
goto fail;
}
break;
case '^': /* only sw ring */
nr_flags = NR_REG_SW;
if (port[1]) {
errmsg = "invalid port for sw ring";
goto fail;
}
break;
case '{':
nr_flags = NR_REG_PIPE_MASTER;
nr_ringid = atoi(port + 1);
break;
case '}':
nr_flags = NR_REG_PIPE_SLAVE;
nr_ringid = atoi(port + 1);
break;
}
if (nr_ringid >= NETMAP_RING_MASK) {
errmsg = "invalid ringid";
goto fail;
}
/* add the *XPOLL flags */
nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
d = (struct nm_desc *)calloc(1, sizeof(*d));
if (d == NULL) {
errmsg = "nm_desc alloc failure";
errno = ENOMEM;
return NULL;
}
d->self = d; /* set this early so nm_close() works */
d->fd = open("/dev/netmap", O_RDWR);
if (d->fd < 0) {
errmsg = "cannot open /dev/netmap";
goto fail;
}
if (req)
d->req = *req;
d->req.nr_version = NETMAP_API;
d->req.nr_ringid &= ~NETMAP_RING_MASK;
/* these fields are overridden by ifname and flags processing */
d->req.nr_ringid |= nr_ringid;
d->req.nr_flags = nr_flags;
memcpy(d->req.nr_name, ifname, namelen);
d->req.nr_name[namelen] = '\0';
/* optionally import info from parent */
if (IS_NETMAP_DESC(parent) && new_flags) {
if (new_flags & NM_OPEN_ARG1)
D("overriding ARG1 %d", parent->req.nr_arg1);
d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
parent->req.nr_arg1 : 4;
if (new_flags & NM_OPEN_ARG2)
D("overriding ARG2 %d", parent->req.nr_arg2);
d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
parent->req.nr_arg2 : 0;
if (new_flags & NM_OPEN_ARG3)
D("overriding ARG3 %d", parent->req.nr_arg3);
d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
parent->req.nr_arg3 : 0;
if (new_flags & NM_OPEN_RING_CFG) {
D("overriding RING_CFG");
d->req.nr_tx_slots = parent->req.nr_tx_slots;
d->req.nr_rx_slots = parent->req.nr_rx_slots;
d->req.nr_tx_rings = parent->req.nr_tx_rings;
d->req.nr_rx_rings = parent->req.nr_rx_rings;
}
if (new_flags & NM_OPEN_IFNAME) {
D("overriding ifname %s ringid 0x%x flags 0x%x",
parent->req.nr_name, parent->req.nr_ringid,
parent->req.nr_flags);
memcpy(d->req.nr_name, parent->req.nr_name,
sizeof(d->req.nr_name));
d->req.nr_ringid = parent->req.nr_ringid;
d->req.nr_flags = parent->req.nr_flags;
}
}
if (ioctl(d->fd, NIOCREGIF, &d->req)) {
errmsg = "NIOCREGIF failed";
goto fail;
}
if (IS_NETMAP_DESC(parent) && parent->mem &&
parent->req.nr_arg2 == d->req.nr_arg2) {
/* do not mmap, inherit from parent */
d->memsize = parent->memsize;
d->mem = parent->mem;
} else {
d->memsize = d->req.nr_memsize;
d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
d->fd, 0);
if (d->mem == NULL) {
errmsg = "mmap failed";
goto fail;
}
d->done_mmap = 1;
}
{
struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
struct netmap_ring *r = NETMAP_RXRING(nifp, );
*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
*(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
*(void **)(uintptr_t)&d->buf_end =
(char *)d->mem + d->memsize;
}
if (nr_flags == NR_REG_SW) { /* host stack */
d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
} else if (nr_flags == NR_REG_ALL_NIC) { /* only nic */
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings - 1;
d->last_rx_ring = d->req.nr_rx_rings - 1;
} else if (nr_flags == NR_REG_NIC_SW) {
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings;
d->last_rx_ring = d->req.nr_rx_rings;
} else if (nr_flags == NR_REG_ONE_NIC) {
/* XXX check validity */
d->first_tx_ring = d->last_tx_ring =
d->first_rx_ring = d->last_rx_ring = nr_ringid;
} else { /* pipes */
d->first_tx_ring = d->last_tx_ring = 0;
d->first_rx_ring = d->last_rx_ring = 0;
}
#ifdef DEBUG_NETMAP_USER
{ /* debugging code */
int i;
D("%s tx %d .. %d %d rx %d .. %d %d", ifname,
d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings,
d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings);
for (i = 0; i <= d->req.nr_tx_rings; i++) {
struct netmap_ring *r = NETMAP_TXRING(d->nifp, i);
D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
}
for (i = 0; i <= d->req.nr_rx_rings; i++) {
struct netmap_ring *r = NETMAP_RXRING(d->nifp, i);
D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
}
}
#endif /* debugging */
d->cur_tx_ring = d->first_tx_ring;
d->cur_rx_ring = d->first_rx_ring;
return d;
fail:
nm_close(d);
if (errmsg)
D("%s %s", errmsg, ifname);
errno = EINVAL;
return NULL;
}
static int
nm_close(struct nm_desc *d)
{
/*
* ugly trick to avoid unused warnings
*/
static void *__xxzt[] __attribute__ ((unused)) =
{ (void *)nm_open, (void *)nm_inject,
(void *)nm_dispatch, (void *)nm_nextpkt } ;
if (d == NULL || d->self != d)
return EINVAL;
if (d->done_mmap && d->mem)
munmap(d->mem, d->memsize);
if (d->fd != -1)
close(d->fd);
bzero(d, sizeof(*d));
free(d);
return 0;
}
/*
* Same prototype as pcap_inject(), only need to cast.
*/
static int
nm_inject(struct nm_desc *d, const void *buf, size_t size)
{
u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
for (c = 0; c < n ; c++) {
/* compute current ring to use */
struct netmap_ring *ring;
uint32_t i, idx;
uint32_t ri = d->cur_tx_ring + c;
if (ri > d->last_tx_ring)
ri = d->first_tx_ring;
ring = NETMAP_TXRING(d->nifp, ri);
if (nm_ring_empty(ring)) {
continue;
}
i = ring->cur;
idx = ring->slot[i].buf_idx;
ring->slot[i].len = size;
nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
d->cur_tx_ring = ri;
ring->head = ring->cur = nm_ring_next(ring, i);
return size;
}
return 0; /* fail */
}
/*
* Same prototype as pcap_dispatch(), only need to cast.
*/
static int
nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
{
int n = d->last_rx_ring - d->first_rx_ring + 1;
int c, got = 0, ri = d->cur_rx_ring;
if (cnt == 0)
cnt = -1;
/* cnt == -1 means infinite, but rings have a finite amount
* of buffers and the int is large enough that we never wrap,
* so we can omit checking for -1
*/
for (c=0; c < n && cnt != got; c++) {
/* compute current ring to use */
struct netmap_ring *ring;
ri = d->cur_rx_ring + c;
if (ri > d->last_rx_ring)
ri = d->first_rx_ring;
ring = NETMAP_RXRING(d->nifp, ri);
for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
u_int i = ring->cur;
u_int idx = ring->slot[i].buf_idx;
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
// __builtin_prefetch(buf);
d->hdr.len = d->hdr.caplen = ring->slot[i].len;
d->hdr.ts = ring->ts;
cb(arg, &d->hdr, buf);
ring->head = ring->cur = nm_ring_next(ring, i);
}
}
d->cur_rx_ring = ri;
return got;
}
static u_char *
nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr)
{
int ri = d->cur_rx_ring;
do {
/* compute current ring to use */
struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
if (!nm_ring_empty(ring)) {
u_int i = ring->cur;
u_int idx = ring->slot[i].buf_idx;
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
// __builtin_prefetch(buf);
hdr->ts = ring->ts;
hdr->len = hdr->caplen = ring->slot[i].len;
ring->cur = nm_ring_next(ring, i);
/* we could postpone advancing head if we want
* to hold the buffer. This can be supported in
* the future.
*/
ring->head = ring->cur;
d->cur_rx_ring = ri;
return buf;
}
ri++;
if (ri > d->last_rx_ring)
ri = d->first_rx_ring;
} while (ri != d->cur_rx_ring);
return NULL; /* nothing found */
}
#endif /* !HAVE_NETMAP_WITH_LIBS */
#endif /* NETMAP_WITH_LIBS */
#endif /* _NET_NETMAP_USER_H_ */