- add netmap-libpcap
- add netmap (FreeBSD header files need to be updated with this) - move prototype perl scripts to prototype/ folder - create basic structure for sipcap app (no code yet)
This commit is contained in:
		
							
								
								
									
										550
									
								
								netmap/sys/net/netmap.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										550
									
								
								netmap/sys/net/netmap.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,550 @@ | ||||
| /* | ||||
|  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. | ||||
|  * | ||||
|  * Redistribution and use in source and binary forms, with or without | ||||
|  * modification, are permitted provided that the following conditions | ||||
|  * are met: | ||||
|  * | ||||
|  *   1. Redistributions of source code must retain the above copyright | ||||
|  *      notice, this list of conditions and the following disclaimer. | ||||
|  *   2. Redistributions in binary form must reproduce the above copyright | ||||
|  *      notice, this list of conditions and the following disclaimer in the | ||||
|  *      documentation and/or other materials provided with the distribution. | ||||
|  * | ||||
|  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND | ||||
|  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
|  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
|  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||||
|  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
|  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||||
|  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||||
|  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||||
|  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||||
|  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||||
|  * SUCH DAMAGE. | ||||
|  */ | ||||
|  | ||||
| /* | ||||
|  * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ | ||||
|  * | ||||
|  * Definitions of constants and the structures used by the netmap | ||||
|  * framework, for the part visible to both kernel and userspace. | ||||
|  * Detailed info on netmap is available with "man netmap" or at | ||||
|  * | ||||
|  *	http://info.iet.unipi.it/~luigi/netmap/ | ||||
|  * | ||||
|  * This API is also used to communicate with the VALE software switch | ||||
|  */ | ||||
|  | ||||
| #ifndef _NET_NETMAP_H_ | ||||
| #define _NET_NETMAP_H_ | ||||
|  | ||||
| #define	NETMAP_API	11		/* current API version */ | ||||
|  | ||||
| #define	NETMAP_MIN_API	11		/* min and max versions accepted */ | ||||
| #define	NETMAP_MAX_API	15 | ||||
| /* | ||||
|  * Some fields should be cache-aligned to reduce contention. | ||||
|  * The alignment is architecture and OS dependent, but rather than | ||||
|  * digging into OS headers to find the exact value we use an estimate | ||||
|  * that should cover most architectures. | ||||
|  */ | ||||
| #define NM_CACHE_ALIGN	128 | ||||
|  | ||||
| /* | ||||
|  * --- Netmap data structures --- | ||||
|  * | ||||
|  * The userspace data structures used by netmap are shown below. | ||||
|  * They are allocated by the kernel and mmap()ed by userspace threads. | ||||
|  * Pointers are implemented as memory offsets or indexes, | ||||
|  * so that they can be easily dereferenced in kernel and userspace. | ||||
|  | ||||
|    KERNEL (opaque, obviously) | ||||
|  | ||||
|   ==================================================================== | ||||
|                                          | | ||||
|    USERSPACE                             |      struct netmap_ring | ||||
|                                          +---->+---------------+ | ||||
|                                              / | head,cur,tail | | ||||
|    struct netmap_if (nifp, 1 per fd)        /  | buf_ofs       | | ||||
|     +---------------+                      /   | other fields  | | ||||
|     | ni_tx_rings   |                     /    +===============+ | ||||
|     | ni_rx_rings   |                    /     | buf_idx, len  | slot[0] | ||||
|     |               |                   /      | flags, ptr    | | ||||
|     |               |                  /       +---------------+ | ||||
|     +===============+                 /        | buf_idx, len  | slot[1] | ||||
|     | txring_ofs[0] | (rel.to nifp)--'         | flags, ptr    | | ||||
|     | txring_ofs[1] |                          +---------------+ | ||||
|      (tx+1 entries)                           (num_slots entries) | ||||
|     | txring_ofs[t] |                          | buf_idx, len  | slot[n-1] | ||||
|     +---------------+                          | flags, ptr    | | ||||
|     | rxring_ofs[0] |                          +---------------+ | ||||
|     | rxring_ofs[1] | | ||||
|      (rx+1 entries) | ||||
|     | rxring_ofs[r] | | ||||
|     +---------------+ | ||||
|  | ||||
|  * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to | ||||
|  * a file descriptor, the mmap()ed region contains a (logically readonly) | ||||
|  * struct netmap_if pointing to struct netmap_ring's. | ||||
|  * | ||||
|  * There is one netmap_ring per physical NIC ring, plus one tx/rx ring | ||||
|  * pair attached to the host stack (this pair is unused for non-NIC ports). | ||||
|  * | ||||
|  * All physical/host stack ports share the same memory region, | ||||
|  * so that zero-copy can be implemented between them. | ||||
|  * VALE switch ports instead have separate memory regions. | ||||
|  * | ||||
|  * The netmap_ring is the userspace-visible replica of the NIC ring. | ||||
|  * Each slot has the index of a buffer (MTU-sized and residing in the | ||||
|  * mmapped region), its length and some flags. An extra 64-bit pointer | ||||
|  * is provided for user-supplied buffers in the tx path. | ||||
|  * | ||||
|  * In user space, the buffer address is computed as | ||||
|  *	(char *)ring + buf_ofs + index * NETMAP_BUF_SIZE | ||||
|  * | ||||
|  * Added in NETMAP_API 11: | ||||
|  * | ||||
|  * + NIOCREGIF can request the allocation of extra spare buffers from | ||||
|  *   the same memory pool. The desired number of buffers must be in | ||||
|  *   nr_arg3. The ioctl may return fewer buffers, depending on memory | ||||
|  *   availability. nr_arg3 will return the actual value, and, once | ||||
|  *   mapped, nifp->ni_bufs_head will be the index of the first buffer. | ||||
|  * | ||||
|  *   The buffers are linked to each other using the first uint32_t | ||||
|  *   as the index. On close, ni_bufs_head must point to the list of | ||||
|  *   buffers to be released. | ||||
|  * | ||||
|  * + NIOCREGIF can request space for extra rings (and buffers) | ||||
|  *   allocated in the same memory space. The number of extra rings | ||||
|  *   is in nr_arg1, and is advisory. This is a no-op on NICs where | ||||
|  *   the size of the memory space is fixed. | ||||
|  * | ||||
|  * + NIOCREGIF can attach to PIPE rings sharing the same memory | ||||
|  *   space with a parent device. The ifname indicates the parent device, | ||||
|  *   which must already exist. Flags in nr_flags indicate if we want to | ||||
|  *   bind the master or slave side, the index (from nr_ringid) | ||||
|  *   is just a cookie and does need to be sequential. | ||||
|  * | ||||
|  * + NIOCREGIF can also attach to 'monitor' rings that replicate | ||||
|  *   the content of specific rings, also from the same memory space. | ||||
|  * | ||||
|  *   Extra flags in nr_flags support the above functions. | ||||
|  *   Application libraries may use the following naming scheme: | ||||
|  *	netmap:foo			all NIC ring pairs | ||||
|  *	netmap:foo^			only host ring pair | ||||
|  *	netmap:foo+			all NIC ring + host ring pairs | ||||
|  *	netmap:foo-k			the k-th NIC ring pair | ||||
|  *	netmap:foo{k			PIPE ring pair k, master side | ||||
|  *	netmap:foo}k			PIPE ring pair k, slave side | ||||
|  */ | ||||
|  | ||||
| /* | ||||
|  * struct netmap_slot is a buffer descriptor | ||||
|  */ | ||||
| struct netmap_slot { | ||||
| 	uint32_t buf_idx;	/* buffer index */ | ||||
| 	uint16_t len;		/* length for this slot */ | ||||
| 	uint16_t flags;		/* buf changed, etc. */ | ||||
| 	uint64_t ptr;		/* pointer for indirect buffers */ | ||||
| }; | ||||
|  | ||||
| /* | ||||
|  * The following flags control how the slot is used | ||||
|  */ | ||||
|  | ||||
| #define	NS_BUF_CHANGED	0x0001	/* buf_idx changed */ | ||||
| 	/* | ||||
| 	 * must be set whenever buf_idx is changed (as it might be | ||||
| 	 * necessary to recompute the physical address and mapping) | ||||
| 	 */ | ||||
|  | ||||
| #define	NS_REPORT	0x0002	/* ask the hardware to report results */ | ||||
| 	/* | ||||
| 	 * Request notification when slot is used by the hardware. | ||||
| 	 * Normally transmit completions are handled lazily and | ||||
| 	 * may be unreported. This flag lets us know when a slot | ||||
| 	 * has been sent (e.g. to terminate the sender). | ||||
| 	 */ | ||||
|  | ||||
| #define	NS_FORWARD	0x0004	/* pass packet 'forward' */ | ||||
| 	/* | ||||
| 	 * (Only for physical ports, rx rings with NR_FORWARD set). | ||||
| 	 * Slot released to the kernel (i.e. before ring->head) with | ||||
| 	 * this flag set are passed to the peer ring (host/NIC), | ||||
| 	 * thus restoring the host-NIC connection for these slots. | ||||
| 	 * This supports efficient traffic monitoring or firewalling. | ||||
| 	 */ | ||||
|  | ||||
| #define	NS_NO_LEARN	0x0008	/* disable bridge learning */ | ||||
|  	/* | ||||
| 	 * On a VALE switch, do not 'learn' the source port for | ||||
|  	 * this buffer. | ||||
| 	 */ | ||||
|  | ||||
| #define	NS_INDIRECT	0x0010	/* userspace buffer */ | ||||
|  	/* | ||||
| 	 * (VALE tx rings only) data is in a userspace buffer, | ||||
| 	 * whose address is in the 'ptr' field in the slot. | ||||
| 	 */ | ||||
|  | ||||
| #define	NS_MOREFRAG	0x0020	/* packet has more fragments */ | ||||
|  	/* | ||||
| 	 * (VALE ports only) | ||||
| 	 * Set on all but the last slot of a multi-segment packet. | ||||
| 	 * The 'len' field refers to the individual fragment. | ||||
| 	 */ | ||||
|  | ||||
| #define	NS_PORT_SHIFT	8 | ||||
| #define	NS_PORT_MASK	(0xff << NS_PORT_SHIFT) | ||||
| 	/* | ||||
|  	 * The high 8 bits of the flag, if not zero, indicate the | ||||
| 	 * destination port for the VALE switch, overriding | ||||
|  	 * the lookup table. | ||||
|  	 */ | ||||
|  | ||||
| #define	NS_RFRAGS(_slot)	( ((_slot)->flags >> 8) & 0xff) | ||||
| 	/* | ||||
| 	 * (VALE rx rings only) the high 8 bits | ||||
| 	 *  are the number of fragments. | ||||
| 	 */ | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * struct netmap_ring | ||||
|  * | ||||
|  * Netmap representation of a TX or RX ring (also known as "queue"). | ||||
|  * This is a queue implemented as a fixed-size circular array. | ||||
|  * At the software level the important fields are: head, cur, tail. | ||||
|  * | ||||
|  * In TX rings: | ||||
|  * | ||||
|  *	head	first slot available for transmission. | ||||
|  *	cur	wakeup point. select() and poll() will unblock | ||||
|  *		when 'tail' moves past 'cur' | ||||
|  *	tail	(readonly) first slot reserved to the kernel | ||||
|  * | ||||
|  *	[head .. tail-1] can be used for new packets to send; | ||||
|  *	'head' and 'cur' must be incremented as slots are filled | ||||
|  *	    with new packets to be sent; | ||||
|  *	'cur' can be moved further ahead if we need more space | ||||
|  *	for new transmissions. | ||||
|  * | ||||
|  * In RX rings: | ||||
|  * | ||||
|  *	head	first valid received packet | ||||
|  *	cur	wakeup point. select() and poll() will unblock | ||||
|  *		when 'tail' moves past 'cur' | ||||
|  *	tail	(readonly) first slot reserved to the kernel | ||||
|  * | ||||
|  *	[head .. tail-1] contain received packets; | ||||
|  *	'head' and 'cur' must be incremented as slots are consumed | ||||
|  *		and can be returned to the kernel; | ||||
|  *	'cur' can be moved further ahead if we want to wait for | ||||
|  *		new packets without returning the previous ones. | ||||
|  * | ||||
|  * DATA OWNERSHIP/LOCKING: | ||||
|  *	The netmap_ring, and all slots and buffers in the range | ||||
|  *	[head .. tail-1] are owned by the user program; | ||||
|  *	the kernel only accesses them during a netmap system call | ||||
|  *	and in the user thread context. | ||||
|  * | ||||
|  *	Other slots and buffers are reserved for use by the kernel | ||||
|  */ | ||||
| struct netmap_ring { | ||||
| 	/* | ||||
| 	 * buf_ofs is meant to be used through macros. | ||||
| 	 * It contains the offset of the buffer region from this | ||||
| 	 * descriptor. | ||||
| 	 */ | ||||
| 	const int64_t	buf_ofs; | ||||
| 	const uint32_t	num_slots;	/* number of slots in the ring. */ | ||||
| 	const uint32_t	nr_buf_size; | ||||
| 	const uint16_t	ringid; | ||||
| 	const uint16_t	dir;		/* 0: tx, 1: rx */ | ||||
|  | ||||
| 	uint32_t        head;		/* (u) first user slot */ | ||||
| 	uint32_t        cur;		/* (u) wakeup point */ | ||||
| 	uint32_t	tail;		/* (k) first kernel slot */ | ||||
|  | ||||
| 	uint32_t	flags; | ||||
|  | ||||
| 	struct timeval	ts;		/* (k) time of last *sync() */ | ||||
|  | ||||
| 	/* opaque room for a mutex or similar object */ | ||||
| 	uint8_t		sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN))); | ||||
|  | ||||
| 	/* the slots follow. This struct has variable size */ | ||||
| 	struct netmap_slot slot[0];	/* array of slots. */ | ||||
| }; | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * RING FLAGS | ||||
|  */ | ||||
| #define	NR_TIMESTAMP	0x0002		/* set timestamp on *sync() */ | ||||
| 	/* | ||||
| 	 * updates the 'ts' field on each netmap syscall. This saves | ||||
| 	 * saves a separate gettimeofday(), and is not much worse than | ||||
| 	 * software timestamps generated in the interrupt handler. | ||||
| 	 */ | ||||
|  | ||||
| #define	NR_FORWARD	0x0004		/* enable NS_FORWARD for ring */ | ||||
|  	/* | ||||
| 	 * Enables the NS_FORWARD slot flag for the ring. | ||||
| 	 */ | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Netmap representation of an interface and its queue(s). | ||||
|  * This is initialized by the kernel when binding a file | ||||
|  * descriptor to a port, and should be considered as readonly | ||||
|  * by user programs. The kernel never uses it. | ||||
|  * | ||||
|  * There is one netmap_if for each file descriptor on which we want | ||||
|  * to select/poll. | ||||
|  * select/poll operates on one or all pairs depending on the value of | ||||
|  * nmr_queueid passed on the ioctl. | ||||
|  */ | ||||
| struct netmap_if { | ||||
| 	char		ni_name[IFNAMSIZ]; /* name of the interface. */ | ||||
| 	const uint32_t	ni_version;	/* API version, currently unused */ | ||||
| 	const uint32_t	ni_flags;	/* properties */ | ||||
| #define	NI_PRIV_MEM	0x1		/* private memory region */ | ||||
|  | ||||
| 	/* | ||||
| 	 * The number of packet rings available in netmap mode. | ||||
| 	 * Physical NICs can have different numbers of tx and rx rings. | ||||
| 	 * Physical NICs also have a 'host' ring pair. | ||||
| 	 * Additionally, clients can request additional ring pairs to | ||||
| 	 * be used for internal communication. | ||||
| 	 */ | ||||
| 	const uint32_t	ni_tx_rings;	/* number of HW tx rings */ | ||||
| 	const uint32_t	ni_rx_rings;	/* number of HW rx rings */ | ||||
|  | ||||
| 	uint32_t	ni_bufs_head;	/* head index for extra bufs */ | ||||
| 	uint32_t	ni_spare1[5]; | ||||
| 	/* | ||||
| 	 * The following array contains the offset of each netmap ring | ||||
| 	 * from this structure, in the following order: | ||||
| 	 * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; | ||||
| 	 * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. | ||||
| 	 * | ||||
| 	 * The area is filled up by the kernel on NIOCREGIF, | ||||
| 	 * and then only read by userspace code. | ||||
| 	 */ | ||||
| 	const ssize_t	ring_ofs[0]; | ||||
| }; | ||||
|  | ||||
|  | ||||
| #ifndef NIOCREGIF | ||||
| /* | ||||
|  * ioctl names and related fields | ||||
|  * | ||||
|  * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, | ||||
|  *	whose identity is set in NIOCREGIF through nr_ringid. | ||||
|  *	These are non blocking and take no argument. | ||||
|  * | ||||
|  * NIOCGINFO takes a struct ifreq, the interface name is the input, | ||||
|  *	the outputs are number of queues and number of descriptor | ||||
|  *	for each queue (useful to set number of threads etc.). | ||||
|  *	The info returned is only advisory and may change before | ||||
|  *	the interface is bound to a file descriptor. | ||||
|  * | ||||
|  * NIOCREGIF takes an interface name within a struct nmre, | ||||
|  *	and activates netmap mode on the interface (if possible). | ||||
|  * | ||||
|  * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we | ||||
|  * can pass it down to other NIC-related ioctls. | ||||
|  * | ||||
|  * The actual argument (struct nmreq) has a number of options to request | ||||
|  * different functions. | ||||
|  * The following are used in NIOCREGIF when nr_cmd == 0: | ||||
|  * | ||||
|  * nr_name	(in) | ||||
|  *	The name of the port (em0, valeXXX:YYY, etc.) | ||||
|  *	limited to IFNAMSIZ for backward compatibility. | ||||
|  * | ||||
|  * nr_version	(in/out) | ||||
|  *	Must match NETMAP_API as used in the kernel, error otherwise. | ||||
|  *	Always returns the desired value on output. | ||||
|  * | ||||
|  * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) | ||||
|  *	On input, non-zero values may be used to reconfigure the port | ||||
|  *	according to the requested values, but this is not guaranteed. | ||||
|  *	On output the actual values in use are reported. | ||||
|  * | ||||
|  * nr_ringid (in) | ||||
|  *	Indicates how rings should be bound to the file descriptors. | ||||
|  *	If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) | ||||
|  *	are used to indicate the ring number, and nr_flags specifies | ||||
|  *	the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. | ||||
|  * | ||||
|  *	NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: | ||||
|  *	If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control | ||||
|  *	the binding as follows: | ||||
|  *	0 (default)			binds all physical rings | ||||
|  *	NETMAP_HW_RING | ring number	binds a single ring pair | ||||
|  *	NETMAP_SW_RING			binds only the host tx/rx rings | ||||
|  * | ||||
|  *	NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push | ||||
|  *		packets on tx rings only if POLLOUT is set. | ||||
|  *		The default is to push any pending packet. | ||||
|  * | ||||
|  *	NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release | ||||
|  *		packets on rx rings also when POLLIN is NOT set. | ||||
|  *		The default is to touch the rx ring only with POLLIN. | ||||
|  *		Note that this is the opposite of TX because it | ||||
|  *		reflects the common usage. | ||||
|  * | ||||
|  *	NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. | ||||
|  *	NETMAP_PRIV_MEM is set on return for ports that do not use | ||||
|  *		the global memory allocator. | ||||
|  *		This information is not significant and applications | ||||
|  *		should look at the region id in nr_arg2 | ||||
|  * | ||||
|  * nr_flags	is the recommended mode to indicate which rings should | ||||
|  *		be bound to a file descriptor. Values are NR_REG_* | ||||
|  * | ||||
|  * nr_arg1 (in)	The number of extra rings to be reserved. | ||||
|  *		Especially when allocating a VALE port the system only | ||||
|  *		allocates the amount of memory needed for the port. | ||||
|  *		If more shared memory rings are desired (e.g. for pipes), | ||||
|  *		the first invocation for the same basename/allocator | ||||
|  *		should specify a suitable number. Memory cannot be | ||||
|  *		extended after the first allocation without closing | ||||
|  *		all ports on the same region. | ||||
|  * | ||||
|  * nr_arg2 (in/out) The identity of the memory region used. | ||||
|  *		On input, 0 means the system decides autonomously, | ||||
|  *		other values may try to select a specific region. | ||||
|  *		On return the actual value is reported. | ||||
|  *		Region '1' is the global allocator, normally shared | ||||
|  *		by all interfaces. Other values are private regions. | ||||
|  *		If two ports the same region zero-copy is possible. | ||||
|  * | ||||
|  * nr_arg3 (in/out)	number of extra buffers to be allocated. | ||||
|  * | ||||
|  * | ||||
|  * | ||||
|  * nr_cmd (in)	if non-zero indicates a special command: | ||||
|  *	NETMAP_BDG_ATTACH	 and nr_name = vale*:ifname | ||||
|  *		attaches the NIC to the switch; nr_ringid specifies | ||||
|  *		which rings to use. Used by vale-ctl -a ... | ||||
|  *	    nr_arg1 = NETMAP_BDG_HOST also attaches the host port | ||||
|  *		as in vale-ctl -h ... | ||||
|  * | ||||
|  *	NETMAP_BDG_DETACH	and nr_name = vale*:ifname | ||||
|  *		disconnects a previously attached NIC. | ||||
|  *		Used by vale-ctl -d ... | ||||
|  * | ||||
|  *	NETMAP_BDG_LIST | ||||
|  *		list the configuration of VALE switches. | ||||
|  * | ||||
|  *	NETMAP_BDG_VNET_HDR | ||||
|  *		Set the virtio-net header length used by the client | ||||
|  *		of a VALE switch port. | ||||
|  * | ||||
|  * nr_arg1, nr_arg2, nr_arg3  (in/out)		command specific | ||||
|  * | ||||
|  * | ||||
|  * | ||||
|  */ | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * struct nmreq overlays a struct ifreq (just the name) | ||||
|  * | ||||
|  * On input, nr_ringid indicates which rings we are requesting, | ||||
|  * with the low flags for the specific ring number. | ||||
|  * selection			FLAGS	RING INDEX | ||||
|  * | ||||
|  *	all the NIC rings	0x0000	- | ||||
|  *	only HOST ring		0x2000	- | ||||
|  *	single NIC ring		0x4000	ring index | ||||
|  *	all the NIC+HOST rings	0x6000	- | ||||
|  *	one pipe ring, master	0x8000	ring index | ||||
|  *	*** INVALID		0xA000 | ||||
|  *	one pipe ring, slave	0xC000	ring index | ||||
|  *	*** INVALID		0xE000 | ||||
|  * | ||||
|  */ | ||||
| struct nmreq { | ||||
| 	char		nr_name[IFNAMSIZ]; | ||||
| 	uint32_t	nr_version;	/* API version */ | ||||
| 	uint32_t	nr_offset;	/* nifp offset in the shared region */ | ||||
| 	uint32_t	nr_memsize;	/* size of the shared region */ | ||||
| 	uint32_t	nr_tx_slots;	/* slots in tx rings */ | ||||
| 	uint32_t	nr_rx_slots;	/* slots in rx rings */ | ||||
| 	uint16_t	nr_tx_rings;	/* number of tx rings */ | ||||
| 	uint16_t	nr_rx_rings;	/* number of rx rings */ | ||||
|  | ||||
| 	uint16_t	nr_ringid;	/* ring(s) we care about */ | ||||
| #define NETMAP_HW_RING		0x4000	/* single NIC ring pair */ | ||||
| #define NETMAP_SW_RING		0x2000	/* only host ring pair */ | ||||
|  | ||||
| #define NETMAP_RING_MASK	0x0fff	/* the ring number */ | ||||
|  | ||||
| #define NETMAP_NO_TX_POLL	0x1000	/* no automatic txsync on poll */ | ||||
|  | ||||
| #define NETMAP_DO_RX_POLL	0x8000	/* DO automatic rxsync on poll */ | ||||
|  | ||||
| 	uint16_t	nr_cmd; | ||||
| #define NETMAP_BDG_ATTACH	1	/* attach the NIC */ | ||||
| #define NETMAP_BDG_DETACH	2	/* detach the NIC */ | ||||
| #define NETMAP_BDG_LOOKUP_REG	3	/* register lookup function */ | ||||
| #define NETMAP_BDG_LIST		4	/* get bridge's info */ | ||||
| #define NETMAP_BDG_VNET_HDR     5       /* set the port virtio-net-hdr length */ | ||||
| #define NETMAP_BDG_OFFSET	NETMAP_BDG_VNET_HDR	/* deprecated alias */ | ||||
|  | ||||
| 	uint16_t	nr_arg1;	/* reserve extra rings in NIOCREGIF */ | ||||
| #define NETMAP_BDG_HOST		1	/* attach the host stack on ATTACH */ | ||||
|  | ||||
| 	uint16_t	nr_arg2; | ||||
| 	uint32_t	nr_arg3;	/* req. extra buffers in NIOCREGIF */ | ||||
| 	uint32_t	nr_flags; | ||||
| 	/* various modes, extends nr_ringid */ | ||||
| 	uint32_t	spare2[1]; | ||||
| }; | ||||
|  | ||||
| #define NR_REG_MASK		0xf /* values for nr_flags */ | ||||
| enum {	NR_REG_DEFAULT	= 0,	/* backward compat, should not be used. */ | ||||
| 	NR_REG_ALL_NIC	= 1, | ||||
| 	NR_REG_SW	= 2, | ||||
| 	NR_REG_NIC_SW	= 3, | ||||
| 	NR_REG_ONE_NIC	= 4, | ||||
| 	NR_REG_PIPE_MASTER = 5, | ||||
| 	NR_REG_PIPE_SLAVE = 6, | ||||
| }; | ||||
| /* monitor uses the NR_REG to select the rings to monitor */ | ||||
| #define NR_MONITOR_TX	0x100 | ||||
| #define NR_MONITOR_RX	0x200 | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * FreeBSD uses the size value embedded in the _IOWR to determine | ||||
|  * how much to copy in/out. So we need it to match the actual | ||||
|  * data structure we pass. We put some spares in the structure | ||||
|  * to ease compatibility with other versions | ||||
|  */ | ||||
| #define NIOCGINFO	_IOWR('i', 145, struct nmreq) /* return IF info */ | ||||
| #define NIOCREGIF	_IOWR('i', 146, struct nmreq) /* interface register */ | ||||
| #define NIOCTXSYNC	_IO('i', 148) /* sync tx queues */ | ||||
| #define NIOCRXSYNC	_IO('i', 149) /* sync rx queues */ | ||||
| #endif /* !NIOCREGIF */ | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Helper functions for kernel and userspace | ||||
|  */ | ||||
|  | ||||
| /* | ||||
|  * check if space is available in the ring. | ||||
|  */ | ||||
| static inline int | ||||
| nm_ring_empty(struct netmap_ring *ring) | ||||
| { | ||||
| 	return (ring->cur == ring->tail); | ||||
| } | ||||
|  | ||||
| #endif /* _NET_NETMAP_H_ */ | ||||
							
								
								
									
										677
									
								
								netmap/sys/net/netmap_user.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										677
									
								
								netmap/sys/net/netmap_user.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,677 @@ | ||||
| /* | ||||
|  * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. | ||||
|  * | ||||
|  * Redistribution and use in source and binary forms, with or without | ||||
|  * modification, are permitted provided that the following conditions | ||||
|  * are met: | ||||
|  * | ||||
|  *   1. Redistributions of source code must retain the above copyright | ||||
|  *      notice, this list of conditions and the following disclaimer. | ||||
|  *   2. Redistributions in binary form must reproduce the above copyright | ||||
|  *      notice, this list of conditions and the following disclaimer in the | ||||
|  *      documentation and/or other materials provided with the distribution. | ||||
|  * | ||||
|  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | ||||
|  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
|  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
|  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||||
|  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
|  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||||
|  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||||
|  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||||
|  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||||
|  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||||
|  * SUCH DAMAGE. | ||||
|  */ | ||||
|  | ||||
| /* | ||||
|  * $FreeBSD$ | ||||
|  * | ||||
|  * Functions and macros to manipulate netmap structures and packets | ||||
|  * in userspace. See netmap(4) for more information. | ||||
|  * | ||||
|  * The address of the struct netmap_if, say nifp, is computed from the | ||||
|  * value returned from ioctl(.., NIOCREG, ...) and the mmap region: | ||||
|  *	ioctl(fd, NIOCREG, &req); | ||||
|  *	mem = mmap(0, ... ); | ||||
|  *	nifp = NETMAP_IF(mem, req.nr_nifp); | ||||
|  *		(so simple, we could just do it manually) | ||||
|  * | ||||
|  * From there: | ||||
|  *	struct netmap_ring *NETMAP_TXRING(nifp, index) | ||||
|  *	struct netmap_ring *NETMAP_RXRING(nifp, index) | ||||
|  *		we can access ring->nr_cur, ring->nr_avail, ring->nr_flags | ||||
|  * | ||||
|  *	ring->slot[i] gives us the i-th slot (we can access | ||||
|  *		directly len, flags, buf_idx) | ||||
|  * | ||||
|  *	char *buf = NETMAP_BUF(ring, x) returns a pointer to | ||||
|  *		the buffer numbered x | ||||
|  * | ||||
|  * All ring indexes (head, cur, tail) should always move forward. | ||||
|  * To compute the next index in a circular ring you can use | ||||
|  *	i = nm_ring_next(ring, i); | ||||
|  * | ||||
|  * To ease porting apps from pcap to netmap we supply a few fuctions | ||||
|  * that can be called to open, close, read and write on netmap in a way | ||||
|  * similar to libpcap. Note that the read/write function depend on | ||||
|  * an ioctl()/select()/poll() being issued to refill rings or push | ||||
|  * packets out. | ||||
|  * | ||||
|  * In order to use these, include #define NETMAP_WITH_LIBS | ||||
|  * in the source file that invokes these functions. | ||||
|  */ | ||||
|  | ||||
| #ifndef _NET_NETMAP_USER_H_ | ||||
| #define _NET_NETMAP_USER_H_ | ||||
|  | ||||
| #include <stdint.h> | ||||
| #include <sys/socket.h>		/* apple needs sockaddr */ | ||||
| #include <net/if.h>		/* IFNAMSIZ */ | ||||
|  | ||||
| #ifndef likely | ||||
| #define likely(x)	__builtin_expect(!!(x), 1) | ||||
| #define unlikely(x)	__builtin_expect(!!(x), 0) | ||||
| #endif /* likely and unlikely */ | ||||
|  | ||||
| #include <net/netmap.h> | ||||
|  | ||||
| /* helper macro */ | ||||
| #define _NETMAP_OFFSET(type, ptr, offset) \ | ||||
| 	((type)(void *)((char *)(ptr) + (offset))) | ||||
|  | ||||
| #define NETMAP_IF(_base, _ofs)	_NETMAP_OFFSET(struct netmap_if *, _base, _ofs) | ||||
|  | ||||
| #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ | ||||
| 	nifp, (nifp)->ring_ofs[index] ) | ||||
|  | ||||
| #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *,	\ | ||||
| 	nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) | ||||
|  | ||||
| #define NETMAP_BUF(ring, index)				\ | ||||
| 	((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) | ||||
|  | ||||
| #define NETMAP_BUF_IDX(ring, buf)			\ | ||||
| 	( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ | ||||
| 		(ring)->nr_buf_size ) | ||||
|  | ||||
|  | ||||
| static inline uint32_t | ||||
| nm_ring_next(struct netmap_ring *r, uint32_t i) | ||||
| { | ||||
| 	return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Return 1 if we have pending transmissions in the tx ring. | ||||
|  * When everything is complete ring->head = ring->tail + 1 (modulo ring size) | ||||
|  */ | ||||
| static inline int | ||||
| nm_tx_pending(struct netmap_ring *r) | ||||
| { | ||||
| 	return nm_ring_next(r, r->tail) != r->head; | ||||
| } | ||||
|  | ||||
|  | ||||
| static inline uint32_t | ||||
| nm_ring_space(struct netmap_ring *ring) | ||||
| { | ||||
|         int ret = ring->tail - ring->cur; | ||||
|         if (ret < 0) | ||||
|                 ret += ring->num_slots; | ||||
|         return ret; | ||||
| } | ||||
|  | ||||
|  | ||||
| #ifdef NETMAP_WITH_LIBS | ||||
| /* | ||||
|  * Support for simple I/O libraries. | ||||
|  * Include other system headers required for compiling this. | ||||
|  */ | ||||
|  | ||||
| #ifndef HAVE_NETMAP_WITH_LIBS | ||||
| #define HAVE_NETMAP_WITH_LIBS | ||||
|  | ||||
| #include <sys/time.h> | ||||
| #include <sys/mman.h> | ||||
| #include <string.h>	/* memset */ | ||||
| #include <sys/ioctl.h> | ||||
| #include <sys/errno.h>	/* EINVAL */ | ||||
| #include <fcntl.h>	/* O_RDWR */ | ||||
| #include <unistd.h>	/* close() */ | ||||
| #include <signal.h> | ||||
| #include <stdlib.h> | ||||
|  | ||||
| #ifndef ND /* debug macros */ | ||||
| /* debug support */ | ||||
| #define ND(_fmt, ...) do {} while(0) | ||||
| #define D(_fmt, ...)						\ | ||||
| 	do {							\ | ||||
| 		struct timeval t0;				\ | ||||
| 		gettimeofday(&t0, NULL);			\ | ||||
| 		fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n",	\ | ||||
| 		    (int)(t0.tv_sec % 1000), (int)t0.tv_usec,	\ | ||||
| 		    __FUNCTION__, __LINE__, ##__VA_ARGS__);	\ | ||||
|         } while (0) | ||||
|  | ||||
| /* Rate limited version of "D", lps indicates how many per second */ | ||||
| #define RD(lps, format, ...)                                    \ | ||||
|     do {                                                        \ | ||||
|         static int t0, __cnt;                                   \ | ||||
|         struct timeval __xxts;                                  \ | ||||
|         gettimeofday(&__xxts, NULL);                            \ | ||||
|         if (t0 != __xxts.tv_sec) {                              \ | ||||
|             t0 = __xxts.tv_sec;                                 \ | ||||
|             __cnt = 0;                                          \ | ||||
|         }                                                       \ | ||||
|         if (__cnt++ < lps) {                                    \ | ||||
|             D(format, ##__VA_ARGS__);                           \ | ||||
|         }                                                       \ | ||||
|     } while (0) | ||||
| #endif | ||||
|  | ||||
| struct nm_pkthdr {	/* same as pcap_pkthdr */ | ||||
| 	struct timeval	ts; | ||||
| 	uint32_t	caplen; | ||||
| 	uint32_t	len; | ||||
| }; | ||||
|  | ||||
| struct nm_stat {	/* same as pcap_stat	*/ | ||||
| 	u_int	ps_recv; | ||||
| 	u_int	ps_drop; | ||||
| 	u_int	ps_ifdrop; | ||||
| #ifdef WIN32 | ||||
| 	u_int	bs_capt; | ||||
| #endif /* WIN32 */ | ||||
| }; | ||||
|  | ||||
| #define NM_ERRBUF_SIZE	512 | ||||
|  | ||||
| struct nm_desc { | ||||
| 	struct nm_desc *self; /* point to self if netmap. */ | ||||
| 	int fd; | ||||
| 	void *mem; | ||||
| 	int memsize; | ||||
| 	int done_mmap;	/* set if mem is the result of mmap */ | ||||
| 	struct netmap_if * const nifp; | ||||
| 	uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; | ||||
| 	uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; | ||||
| 	struct nmreq req;	/* also contains the nr_name = ifname */ | ||||
| 	struct nm_pkthdr hdr; | ||||
|  | ||||
| 	/* | ||||
| 	 * The memory contains netmap_if, rings and then buffers. | ||||
| 	 * Given a pointer (e.g. to nm_inject) we can compare with | ||||
| 	 * mem/buf_start/buf_end to tell if it is a buffer or | ||||
| 	 * some other descriptor in our region. | ||||
| 	 * We also store a pointer to some ring as it helps in the | ||||
| 	 * translation from buffer indexes to addresses. | ||||
| 	 */ | ||||
| 	struct netmap_ring * const some_ring; | ||||
| 	void * const buf_start; | ||||
| 	void * const buf_end; | ||||
| 	/* parameters from pcap_open_live */ | ||||
| 	int snaplen; | ||||
| 	int promisc; | ||||
| 	int to_ms; | ||||
| 	char *errbuf; | ||||
|  | ||||
| 	/* save flags so we can restore them on close */ | ||||
| 	uint32_t if_flags; | ||||
|         uint32_t if_reqcap; | ||||
|         uint32_t if_curcap; | ||||
|  | ||||
| 	struct nm_stat st; | ||||
| 	char msg[NM_ERRBUF_SIZE]; | ||||
| }; | ||||
|  | ||||
| /* | ||||
|  * when the descriptor is open correctly, d->self == d | ||||
|  * Eventually we should also use some magic number. | ||||
|  */ | ||||
| #define P2NMD(p)		((struct nm_desc *)(p)) | ||||
| #define IS_NETMAP_DESC(d)	((d) && P2NMD(d)->self == P2NMD(d)) | ||||
| #define NETMAP_FD(d)		(P2NMD(d)->fd) | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * this is a slightly optimized copy routine which rounds | ||||
|  * to multiple of 64 bytes and is often faster than dealing | ||||
|  * with other odd sizes. We assume there is enough room | ||||
|  * in the source and destination buffers. | ||||
|  * | ||||
|  * XXX only for multiples of 64 bytes, non overlapped. | ||||
|  */ | ||||
| static inline void | ||||
| nm_pkt_copy(const void *_src, void *_dst, int l) | ||||
| { | ||||
| 	const uint64_t *src = (const uint64_t *)_src; | ||||
| 	uint64_t *dst = (uint64_t *)_dst; | ||||
|  | ||||
| 	if (unlikely(l >= 1024)) { | ||||
| 		memcpy(dst, src, l); | ||||
| 		return; | ||||
| 	} | ||||
| 	for (; likely(l > 0); l-=64) { | ||||
| 		*dst++ = *src++; | ||||
| 		*dst++ = *src++; | ||||
| 		*dst++ = *src++; | ||||
| 		*dst++ = *src++; | ||||
| 		*dst++ = *src++; | ||||
| 		*dst++ = *src++; | ||||
| 		*dst++ = *src++; | ||||
| 		*dst++ = *src++; | ||||
| 	} | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * The callback, invoked on each received packet. Same as libpcap | ||||
|  */ | ||||
| typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); | ||||
|  | ||||
| /* | ||||
|  *--- the pcap-like API --- | ||||
|  * | ||||
|  * nm_open() opens a file descriptor, binds to a port and maps memory. | ||||
|  * | ||||
|  * ifname	(netmap:foo or vale:foo) is the port name | ||||
|  *		a suffix can indicate the follwing: | ||||
|  *		^		bind the host (sw) ring pair | ||||
|  *		*		bind host and NIC ring pairs (transparent) | ||||
|  *		-NN		bind individual NIC ring pair | ||||
|  *		{NN		bind master side of pipe NN | ||||
|  *		}NN		bind slave side of pipe NN | ||||
|  * | ||||
|  * req		provides the initial values of nmreq before parsing ifname. | ||||
|  *		Remember that the ifname parsing will override the ring | ||||
|  *		number in nm_ringid, and part of nm_flags; | ||||
|  * flags	special functions, normally 0 | ||||
|  *		indicates which fields of *arg are significant | ||||
|  * arg		special functions, normally NULL | ||||
|  *		if passed a netmap_desc with mem != NULL, | ||||
|  *		use that memory instead of mmap. | ||||
|  */ | ||||
|  | ||||
| static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, | ||||
| 	uint64_t flags, const struct nm_desc *arg); | ||||
|  | ||||
| /* | ||||
|  * nm_open can import some fields from the parent descriptor. | ||||
|  * These flags control which ones. | ||||
|  * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, | ||||
|  * which set the initial value for these flags. | ||||
|  * Note that the 16 low bits of the flags are reserved for data | ||||
|  * that may go into the nmreq. | ||||
|  */ | ||||
| enum { | ||||
| 	NM_OPEN_NO_MMAP =	0x040000, /* reuse mmap from parent */ | ||||
| 	NM_OPEN_IFNAME =	0x080000, /* nr_name, nr_ringid, nr_flags */ | ||||
| 	NM_OPEN_ARG1 =		0x100000, | ||||
| 	NM_OPEN_ARG2 =		0x200000, | ||||
| 	NM_OPEN_ARG3 =		0x400000, | ||||
| 	NM_OPEN_RING_CFG =	0x800000, /* tx|rx rings|slots */ | ||||
| }; | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * nm_close()	closes and restores the port to its previous state | ||||
|  */ | ||||
|  | ||||
| static int nm_close(struct nm_desc *); | ||||
|  | ||||
| /* | ||||
|  * nm_inject() is the same as pcap_inject() | ||||
|  * nm_dispatch() is the same as pcap_dispatch() | ||||
|  * nm_nextpkt() is the same as pcap_next() | ||||
|  */ | ||||
|  | ||||
| static int nm_inject(struct nm_desc *, const void *, size_t); | ||||
| static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); | ||||
| static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Try to open, return descriptor if successful, NULL otherwise. | ||||
|  * An invalid netmap name will return errno = 0; | ||||
|  * You can pass a pointer to a pre-filled nm_desc to add special | ||||
|  * parameters. Flags is used as follows | ||||
|  * NM_OPEN_NO_MMAP	use the memory from arg, only | ||||
|  *			if the nr_arg2 (memory block) matches. | ||||
|  * NM_OPEN_ARG1		use req.nr_arg1 from arg | ||||
|  * NM_OPEN_ARG2		use req.nr_arg2 from arg | ||||
|  * NM_OPEN_RING_CFG	user ring config from arg | ||||
|  */ | ||||
| static struct nm_desc * | ||||
| nm_open(const char *ifname, const struct nmreq *req, | ||||
| 	uint64_t new_flags, const struct nm_desc *arg) | ||||
| { | ||||
| 	struct nm_desc *d = NULL; | ||||
| 	const struct nm_desc *parent = arg; | ||||
| 	u_int namelen; | ||||
| 	uint32_t nr_ringid = 0, nr_flags; | ||||
| 	const char *port = NULL; | ||||
| 	const char *errmsg = NULL; | ||||
|  | ||||
| 	if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { | ||||
| 		errno = 0; /* name not recognised, not an error */ | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	if (ifname[0] == 'n') | ||||
| 		ifname += 7; | ||||
| 	/* scan for a separator */ | ||||
| 	for (port = ifname; *port && !index("-*^{}", *port); port++) | ||||
| 		; | ||||
| 	namelen = port - ifname; | ||||
| 	if (namelen >= sizeof(d->req.nr_name)) { | ||||
| 		errmsg = "name too long"; | ||||
| 		goto fail; | ||||
| 	} | ||||
| 	switch (*port) { | ||||
| 	default:  /* '\0', no suffix */ | ||||
| 		nr_flags = NR_REG_ALL_NIC; | ||||
| 		break; | ||||
| 	case '-': /* one NIC */ | ||||
| 		nr_flags = NR_REG_ONE_NIC; | ||||
| 		nr_ringid = atoi(port + 1); | ||||
| 		break; | ||||
| 	case '*': /* NIC and SW, ignore port */ | ||||
| 		nr_flags = NR_REG_NIC_SW; | ||||
| 		if (port[1]) { | ||||
| 			errmsg = "invalid port for nic+sw"; | ||||
| 			goto fail; | ||||
| 		} | ||||
| 		break; | ||||
| 	case '^': /* only sw ring */ | ||||
| 		nr_flags = NR_REG_SW; | ||||
| 		if (port[1]) { | ||||
| 			errmsg = "invalid port for sw ring"; | ||||
| 			goto fail; | ||||
| 		} | ||||
| 		break; | ||||
| 	case '{': | ||||
| 		nr_flags = NR_REG_PIPE_MASTER; | ||||
| 		nr_ringid = atoi(port + 1); | ||||
| 		break; | ||||
| 	case '}': | ||||
| 		nr_flags = NR_REG_PIPE_SLAVE; | ||||
| 		nr_ringid = atoi(port + 1); | ||||
| 		break; | ||||
| 	} | ||||
|  | ||||
| 	if (nr_ringid >= NETMAP_RING_MASK) { | ||||
| 		errmsg = "invalid ringid"; | ||||
| 		goto fail; | ||||
| 	} | ||||
| 	/* add the *XPOLL flags */ | ||||
| 	nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); | ||||
|  | ||||
| 	d = (struct nm_desc *)calloc(1, sizeof(*d)); | ||||
| 	if (d == NULL) { | ||||
| 		errmsg = "nm_desc alloc failure"; | ||||
| 		errno = ENOMEM; | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	d->self = d;	/* set this early so nm_close() works */ | ||||
| 	d->fd = open("/dev/netmap", O_RDWR); | ||||
| 	if (d->fd < 0) { | ||||
| 		errmsg = "cannot open /dev/netmap"; | ||||
| 		goto fail; | ||||
| 	} | ||||
|  | ||||
| 	if (req) | ||||
| 		d->req = *req; | ||||
| 	d->req.nr_version = NETMAP_API; | ||||
| 	d->req.nr_ringid &= ~NETMAP_RING_MASK; | ||||
|  | ||||
| 	/* these fields are overridden by ifname and flags processing */ | ||||
| 	d->req.nr_ringid |= nr_ringid; | ||||
| 	d->req.nr_flags = nr_flags; | ||||
| 	memcpy(d->req.nr_name, ifname, namelen); | ||||
| 	d->req.nr_name[namelen] = '\0'; | ||||
| 	/* optionally import info from parent */ | ||||
| 	if (IS_NETMAP_DESC(parent) && new_flags) { | ||||
| 		if (new_flags & NM_OPEN_ARG1) | ||||
| 			D("overriding ARG1 %d", parent->req.nr_arg1); | ||||
| 		d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? | ||||
| 			parent->req.nr_arg1 : 4; | ||||
| 		if (new_flags & NM_OPEN_ARG2) | ||||
| 			D("overriding ARG2 %d", parent->req.nr_arg2); | ||||
| 		d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ? | ||||
| 			parent->req.nr_arg2 : 0; | ||||
| 		if (new_flags & NM_OPEN_ARG3) | ||||
| 			D("overriding ARG3 %d", parent->req.nr_arg3); | ||||
| 		d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? | ||||
| 			parent->req.nr_arg3 : 0; | ||||
| 		if (new_flags & NM_OPEN_RING_CFG) { | ||||
| 			D("overriding RING_CFG"); | ||||
| 			d->req.nr_tx_slots = parent->req.nr_tx_slots; | ||||
| 			d->req.nr_rx_slots = parent->req.nr_rx_slots; | ||||
| 			d->req.nr_tx_rings = parent->req.nr_tx_rings; | ||||
| 			d->req.nr_rx_rings = parent->req.nr_rx_rings; | ||||
| 		} | ||||
| 		if (new_flags & NM_OPEN_IFNAME) { | ||||
| 			D("overriding ifname %s ringid 0x%x flags 0x%x", | ||||
| 				parent->req.nr_name, parent->req.nr_ringid, | ||||
| 				parent->req.nr_flags); | ||||
| 			memcpy(d->req.nr_name, parent->req.nr_name, | ||||
| 				sizeof(d->req.nr_name)); | ||||
| 			d->req.nr_ringid = parent->req.nr_ringid; | ||||
| 			d->req.nr_flags = parent->req.nr_flags; | ||||
| 		} | ||||
| 	} | ||||
| 	if (ioctl(d->fd, NIOCREGIF, &d->req)) { | ||||
| 		errmsg = "NIOCREGIF failed"; | ||||
| 		goto fail; | ||||
| 	} | ||||
|  | ||||
| 	if (IS_NETMAP_DESC(parent) && parent->mem && | ||||
| 	    parent->req.nr_arg2 == d->req.nr_arg2) { | ||||
| 		/* do not mmap, inherit from parent */ | ||||
| 		d->memsize = parent->memsize; | ||||
| 		d->mem = parent->mem; | ||||
| 	} else { | ||||
| 		d->memsize = d->req.nr_memsize; | ||||
| 		d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, | ||||
| 				d->fd, 0); | ||||
| 		if (d->mem == NULL) { | ||||
| 			errmsg = "mmap failed"; | ||||
| 			goto fail; | ||||
| 		} | ||||
| 		d->done_mmap = 1; | ||||
| 	} | ||||
| 	{ | ||||
| 		struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); | ||||
| 		struct netmap_ring *r = NETMAP_RXRING(nifp, ); | ||||
|  | ||||
| 		*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; | ||||
| 		*(struct netmap_ring **)(uintptr_t)&d->some_ring = r; | ||||
| 		*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); | ||||
| 		*(void **)(uintptr_t)&d->buf_end = | ||||
| 			(char *)d->mem + d->memsize; | ||||
| 	} | ||||
|  | ||||
| 	if (nr_flags ==  NR_REG_SW) { /* host stack */ | ||||
| 		d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; | ||||
| 		d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; | ||||
| 	} else if (nr_flags ==  NR_REG_ALL_NIC) { /* only nic */ | ||||
| 		d->first_tx_ring = 0; | ||||
| 		d->first_rx_ring = 0; | ||||
| 		d->last_tx_ring = d->req.nr_tx_rings - 1; | ||||
| 		d->last_rx_ring = d->req.nr_rx_rings - 1; | ||||
| 	} else if (nr_flags ==  NR_REG_NIC_SW) { | ||||
| 		d->first_tx_ring = 0; | ||||
| 		d->first_rx_ring = 0; | ||||
| 		d->last_tx_ring = d->req.nr_tx_rings; | ||||
| 		d->last_rx_ring = d->req.nr_rx_rings; | ||||
| 	} else if (nr_flags == NR_REG_ONE_NIC) { | ||||
| 		/* XXX check validity */ | ||||
| 		d->first_tx_ring = d->last_tx_ring = | ||||
| 		d->first_rx_ring = d->last_rx_ring = nr_ringid; | ||||
| 	} else { /* pipes */ | ||||
| 		d->first_tx_ring = d->last_tx_ring = 0; | ||||
| 		d->first_rx_ring = d->last_rx_ring = 0; | ||||
| 	} | ||||
|  | ||||
| #ifdef DEBUG_NETMAP_USER | ||||
|     { /* debugging code */ | ||||
| 	int i; | ||||
|  | ||||
| 	D("%s tx %d .. %d %d rx %d .. %d %d", ifname, | ||||
| 		d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, | ||||
|                 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); | ||||
| 	for (i = 0; i <= d->req.nr_tx_rings; i++) { | ||||
| 		struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); | ||||
| 		D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); | ||||
| 	} | ||||
| 	for (i = 0; i <= d->req.nr_rx_rings; i++) { | ||||
| 		struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); | ||||
| 		D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); | ||||
| 	} | ||||
|     } | ||||
| #endif /* debugging */ | ||||
|  | ||||
| 	d->cur_tx_ring = d->first_tx_ring; | ||||
| 	d->cur_rx_ring = d->first_rx_ring; | ||||
| 	return d; | ||||
|  | ||||
| fail: | ||||
| 	nm_close(d); | ||||
| 	if (errmsg) | ||||
| 		D("%s %s", errmsg, ifname); | ||||
| 	errno = EINVAL; | ||||
| 	return NULL; | ||||
| } | ||||
|  | ||||
|  | ||||
| static int | ||||
| nm_close(struct nm_desc *d) | ||||
| { | ||||
| 	/* | ||||
| 	 * ugly trick to avoid unused warnings | ||||
| 	 */ | ||||
| 	static void *__xxzt[] __attribute__ ((unused))  = | ||||
| 		{ (void *)nm_open, (void *)nm_inject, | ||||
| 		  (void *)nm_dispatch, (void *)nm_nextpkt } ; | ||||
|  | ||||
| 	if (d == NULL || d->self != d) | ||||
| 		return EINVAL; | ||||
| 	if (d->done_mmap && d->mem) | ||||
| 		munmap(d->mem, d->memsize); | ||||
| 	if (d->fd != -1) | ||||
| 		close(d->fd); | ||||
| 	bzero(d, sizeof(*d)); | ||||
| 	free(d); | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Same prototype as pcap_inject(), only need to cast. | ||||
|  */ | ||||
| static int | ||||
| nm_inject(struct nm_desc *d, const void *buf, size_t size) | ||||
| { | ||||
| 	u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; | ||||
|  | ||||
| 	for (c = 0; c < n ; c++) { | ||||
| 		/* compute current ring to use */ | ||||
| 		struct netmap_ring *ring; | ||||
| 		uint32_t i, idx; | ||||
| 		uint32_t ri = d->cur_tx_ring + c; | ||||
|  | ||||
| 		if (ri > d->last_tx_ring) | ||||
| 			ri = d->first_tx_ring; | ||||
| 		ring = NETMAP_TXRING(d->nifp, ri); | ||||
| 		if (nm_ring_empty(ring)) { | ||||
| 			continue; | ||||
| 		} | ||||
| 		i = ring->cur; | ||||
| 		idx = ring->slot[i].buf_idx; | ||||
| 		ring->slot[i].len = size; | ||||
| 		nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); | ||||
| 		d->cur_tx_ring = ri; | ||||
| 		ring->head = ring->cur = nm_ring_next(ring, i); | ||||
| 		return size; | ||||
| 	} | ||||
| 	return 0; /* fail */ | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Same prototype as pcap_dispatch(), only need to cast. | ||||
|  */ | ||||
| static int | ||||
| nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) | ||||
| { | ||||
| 	int n = d->last_rx_ring - d->first_rx_ring + 1; | ||||
| 	int c, got = 0, ri = d->cur_rx_ring; | ||||
|  | ||||
| 	if (cnt == 0) | ||||
| 		cnt = -1; | ||||
| 	/* cnt == -1 means infinite, but rings have a finite amount | ||||
| 	 * of buffers and the int is large enough that we never wrap, | ||||
| 	 * so we can omit checking for -1 | ||||
| 	 */ | ||||
| 	for (c=0; c < n && cnt != got; c++) { | ||||
| 		/* compute current ring to use */ | ||||
| 		struct netmap_ring *ring; | ||||
|  | ||||
| 		ri = d->cur_rx_ring + c; | ||||
| 		if (ri > d->last_rx_ring) | ||||
| 			ri = d->first_rx_ring; | ||||
| 		ring = NETMAP_RXRING(d->nifp, ri); | ||||
| 		for ( ; !nm_ring_empty(ring) && cnt != got; got++) { | ||||
| 			u_int i = ring->cur; | ||||
| 			u_int idx = ring->slot[i].buf_idx; | ||||
| 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx); | ||||
|  | ||||
| 			// __builtin_prefetch(buf); | ||||
| 			d->hdr.len = d->hdr.caplen = ring->slot[i].len; | ||||
| 			d->hdr.ts = ring->ts; | ||||
| 			cb(arg, &d->hdr, buf); | ||||
| 			ring->head = ring->cur = nm_ring_next(ring, i); | ||||
| 		} | ||||
| 	} | ||||
| 	d->cur_rx_ring = ri; | ||||
| 	return got; | ||||
| } | ||||
|  | ||||
| static u_char * | ||||
| nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) | ||||
| { | ||||
| 	int ri = d->cur_rx_ring; | ||||
|  | ||||
| 	do { | ||||
| 		/* compute current ring to use */ | ||||
| 		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); | ||||
| 		if (!nm_ring_empty(ring)) { | ||||
| 			u_int i = ring->cur; | ||||
| 			u_int idx = ring->slot[i].buf_idx; | ||||
| 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx); | ||||
|  | ||||
| 			// __builtin_prefetch(buf); | ||||
| 			hdr->ts = ring->ts; | ||||
| 			hdr->len = hdr->caplen = ring->slot[i].len; | ||||
| 			ring->cur = nm_ring_next(ring, i); | ||||
| 			/* we could postpone advancing head if we want | ||||
| 			 * to hold the buffer. This can be supported in | ||||
| 			 * the future. | ||||
| 			 */ | ||||
| 			ring->head = ring->cur; | ||||
| 			d->cur_rx_ring = ri; | ||||
| 			return buf; | ||||
| 		} | ||||
| 		ri++; | ||||
| 		if (ri > d->last_rx_ring) | ||||
| 			ri = d->first_rx_ring; | ||||
| 	} while (ri != d->cur_rx_ring); | ||||
| 	return NULL; /* nothing found */ | ||||
| } | ||||
|  | ||||
| #endif /* !HAVE_NETMAP_WITH_LIBS */ | ||||
|  | ||||
| #endif /* NETMAP_WITH_LIBS */ | ||||
|  | ||||
| #endif /* _NET_NETMAP_USER_H_ */ | ||||
		Reference in New Issue
	
	Block a user