- add netmap-libpcap
- add netmap (FreeBSD header files need to be updated with this) - move prototype perl scripts to prototype/ folder - create basic structure for sipcap app (no code yet)
This commit is contained in:
334
netmap/sys/dev/netmap/if_em_netmap.h
Normal file
334
netmap/sys/dev/netmap/if_em_netmap.h
Normal file
@ -0,0 +1,334 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 238985 2012-08-02 11:59:43Z luigi $
|
||||
*
|
||||
* netmap support for: em.
|
||||
*
|
||||
* For more details on netmap support please see ixgbe_netmap.h
|
||||
*/
|
||||
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h> /* vtophys ? */
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
// XXX do we need to block/unblock the tasks ?
|
||||
static void
|
||||
em_netmap_block_tasks(struct adapter *adapter)
|
||||
{
|
||||
if (adapter->msix > 1) { /* MSIX */
|
||||
int i;
|
||||
struct tx_ring *txr = adapter->tx_rings;
|
||||
struct rx_ring *rxr = adapter->rx_rings;
|
||||
|
||||
for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
|
||||
taskqueue_block(txr->tq);
|
||||
taskqueue_drain(txr->tq, &txr->tx_task);
|
||||
taskqueue_block(rxr->tq);
|
||||
taskqueue_drain(rxr->tq, &rxr->rx_task);
|
||||
}
|
||||
} else { /* legacy */
|
||||
taskqueue_block(adapter->tq);
|
||||
taskqueue_drain(adapter->tq, &adapter->link_task);
|
||||
taskqueue_drain(adapter->tq, &adapter->que_task);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
em_netmap_unblock_tasks(struct adapter *adapter)
|
||||
{
|
||||
if (adapter->msix > 1) {
|
||||
struct tx_ring *txr = adapter->tx_rings;
|
||||
struct rx_ring *rxr = adapter->rx_rings;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adapter->num_queues; i++) {
|
||||
taskqueue_unblock(txr->tq);
|
||||
taskqueue_unblock(rxr->tq);
|
||||
}
|
||||
} else { /* legacy */
|
||||
taskqueue_unblock(adapter->tq);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Register/unregister. We are already under netmap lock.
|
||||
*/
|
||||
static int
|
||||
em_netmap_reg(struct netmap_adapter *na, int onoff)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
EM_CORE_LOCK(adapter);
|
||||
em_disable_intr(adapter);
|
||||
|
||||
/* Tell the stack that the interface is no longer active */
|
||||
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
|
||||
|
||||
em_netmap_block_tasks(adapter);
|
||||
/* enable or disable flags and callbacks in na and ifp */
|
||||
if (onoff) {
|
||||
nm_set_native_flags(na);
|
||||
} else {
|
||||
nm_clear_native_flags(na);
|
||||
}
|
||||
em_init_locked(adapter); /* also enable intr */
|
||||
em_netmap_unblock_tasks(adapter);
|
||||
EM_CORE_UNLOCK(adapter);
|
||||
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
em_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = kring->rhead;
|
||||
/* generate an interrupt approximately every half ring */
|
||||
u_int report_frequency = kring->nkr_num_slots >> 1;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
|
||||
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
/*
|
||||
* First part: process new packets to send.
|
||||
*/
|
||||
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) { /* we have new packets to send */
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
u_int len = slot->len;
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
/* device-specific */
|
||||
struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
|
||||
struct em_buffer *txbuf = &txr->tx_buffers[nic_i];
|
||||
int flags = (slot->flags & NS_REPORT ||
|
||||
nic_i == 0 || nic_i == report_frequency) ?
|
||||
E1000_TXD_CMD_RS : 0;
|
||||
|
||||
NM_CHECK_ADDR_LEN(addr, len);
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
curr->buffer_addr = htole64(paddr);
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(txr->txtag, txbuf->map, addr);
|
||||
}
|
||||
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
|
||||
|
||||
/* Fill the slot in the NIC ring. */
|
||||
curr->upper.data = 0;
|
||||
curr->lower.data = htole32(adapter->txd_cmd | len |
|
||||
(E1000_TXD_CMD_EOP | flags) );
|
||||
bus_dmamap_sync(txr->txtag, txbuf->map,
|
||||
BUS_DMASYNC_PREWRITE);
|
||||
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
/* synchronize the NIC ring */
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
|
||||
/* (re)start the tx unit up to slot nic_i (excluded) */
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: reclaim buffers for completed transmissions.
|
||||
*/
|
||||
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
|
||||
/* record completed transmissions using TDH */
|
||||
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
|
||||
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
|
||||
D("TDH wrap %d", nic_i);
|
||||
nic_i -= kring->nkr_num_slots;
|
||||
}
|
||||
if (nic_i != txr->next_to_clean) {
|
||||
txr->next_to_clean = nic_i;
|
||||
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
|
||||
}
|
||||
}
|
||||
|
||||
nm_txsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
em_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = nm_rxsync_prologue(kring);
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
|
||||
|
||||
if (head > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
|
||||
/*
|
||||
* First part: import newly received packets.
|
||||
*/
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
|
||||
nic_i = rxr->next_to_check;
|
||||
nm_i = netmap_idx_n2k(kring, nic_i);
|
||||
|
||||
for (n = 0; ; n++) { // XXX no need to count
|
||||
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
|
||||
uint32_t staterr = le32toh(curr->status);
|
||||
|
||||
if ((staterr & E1000_RXD_STAT_DD) == 0)
|
||||
break;
|
||||
ring->slot[nm_i].len = le16toh(curr->length);
|
||||
ring->slot[nm_i].flags = slot_flags;
|
||||
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
/* make sure next_to_refresh follows next_to_check */
|
||||
rxr->next_to_refresh = nic_i; // XXX
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
if (n) { /* update the state variables */
|
||||
rxr->next_to_check = nic_i;
|
||||
kring->nr_hwtail = nm_i;
|
||||
}
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: skip past packets that userspace has released.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) {
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
|
||||
struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i];
|
||||
|
||||
if (addr == netmap_buffer_base) /* bad buf */
|
||||
goto ring_reset;
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
curr->buffer_addr = htole64(paddr);
|
||||
netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
}
|
||||
curr->status = 0;
|
||||
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
|
||||
BUS_DMASYNC_PREREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
/*
|
||||
* IMPORTANT: we must leave one free slot in the ring,
|
||||
* so move nic_i back by one unit
|
||||
*/
|
||||
nic_i = nm_prev(nic_i, lim);
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
|
||||
}
|
||||
|
||||
/* tell userspace that there might be new packets */
|
||||
nm_rxsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
|
||||
ring_reset:
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
em_netmap_attach(struct adapter *adapter)
|
||||
{
|
||||
struct netmap_adapter na;
|
||||
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = em_netmap_txsync;
|
||||
na.nm_rxsync = em_netmap_rxsync;
|
||||
na.nm_register = em_netmap_reg;
|
||||
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
|
||||
netmap_attach(&na);
|
||||
}
|
||||
|
||||
/* end of file */
|
314
netmap/sys/dev/netmap/if_igb_netmap.h
Normal file
314
netmap/sys/dev/netmap/if_igb_netmap.h
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/dev/netmap/if_igb_netmap.h 256200 2013-10-09 17:32:52Z jfv $
|
||||
*
|
||||
* Netmap support for igb, partly contributed by Ahmed Kooli
|
||||
* For details on netmap support please see ixgbe_netmap.h
|
||||
*/
|
||||
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h> /* vtophys ? */
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
/*
|
||||
* Adaptation to different versions of the driver.
|
||||
*/
|
||||
|
||||
#ifndef IGB_MEDIA_RESET
|
||||
/* at the same time as IGB_MEDIA_RESET was defined, the
|
||||
* tx buffer descriptor was renamed, so use this to revert
|
||||
* back to the old name.
|
||||
*/
|
||||
#define igb_tx_buf igb_tx_buffer
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Register/unregister. We are already under netmap lock.
|
||||
*/
|
||||
static int
|
||||
igb_netmap_reg(struct netmap_adapter *na, int onoff)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
IGB_CORE_LOCK(adapter);
|
||||
igb_disable_intr(adapter);
|
||||
|
||||
/* Tell the stack that the interface is no longer active */
|
||||
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
|
||||
|
||||
/* enable or disable flags and callbacks in na and ifp */
|
||||
if (onoff) {
|
||||
nm_set_native_flags(na);
|
||||
} else {
|
||||
nm_clear_native_flags(na);
|
||||
}
|
||||
igb_init_locked(adapter); /* also enable intr */
|
||||
IGB_CORE_UNLOCK(adapter);
|
||||
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
igb_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = kring->rhead;
|
||||
/* generate an interrupt approximately every half ring */
|
||||
u_int report_frequency = kring->nkr_num_slots >> 1;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
|
||||
/* 82575 needs the queue index added */
|
||||
u32 olinfo_status =
|
||||
(adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
|
||||
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
/*
|
||||
* First part: process new packets to send.
|
||||
*/
|
||||
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) { /* we have new packets to send */
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
u_int len = slot->len;
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
/* device-specific */
|
||||
union e1000_adv_tx_desc *curr =
|
||||
(union e1000_adv_tx_desc *)&txr->tx_base[nic_i];
|
||||
struct igb_tx_buf *txbuf = &txr->tx_buffers[nic_i];
|
||||
int flags = (slot->flags & NS_REPORT ||
|
||||
nic_i == 0 || nic_i == report_frequency) ?
|
||||
E1000_ADVTXD_DCMD_RS : 0;
|
||||
|
||||
NM_CHECK_ADDR_LEN(addr, len);
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(txr->txtag, txbuf->map, addr);
|
||||
}
|
||||
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
|
||||
|
||||
/* Fill the slot in the NIC ring. */
|
||||
curr->read.buffer_addr = htole64(paddr);
|
||||
// XXX check olinfo and cmd_type_len
|
||||
curr->read.olinfo_status =
|
||||
htole32(olinfo_status |
|
||||
(len<< E1000_ADVTXD_PAYLEN_SHIFT));
|
||||
curr->read.cmd_type_len =
|
||||
htole32(len | E1000_ADVTXD_DTYP_DATA |
|
||||
E1000_ADVTXD_DCMD_IFCS |
|
||||
E1000_ADVTXD_DCMD_DEXT |
|
||||
E1000_ADVTXD_DCMD_EOP | flags);
|
||||
|
||||
/* make sure changes to the buffer are synced */
|
||||
bus_dmamap_sync(txr->txtag, txbuf->map,
|
||||
BUS_DMASYNC_PREWRITE);
|
||||
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
/* Set the watchdog XXX ? */
|
||||
txr->queue_status = IGB_QUEUE_WORKING;
|
||||
txr->watchdog_time = ticks;
|
||||
|
||||
/* synchronize the NIC ring */
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
|
||||
/* (re)start the tx unit up to slot nic_i (excluded) */
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: reclaim buffers for completed transmissions.
|
||||
*/
|
||||
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
|
||||
/* record completed transmissions using TDH */
|
||||
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
|
||||
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
|
||||
D("TDH wrap %d", nic_i);
|
||||
nic_i -= kring->nkr_num_slots;
|
||||
}
|
||||
txr->next_to_clean = nic_i;
|
||||
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
|
||||
}
|
||||
|
||||
nm_txsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
igb_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = nm_rxsync_prologue(kring);
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
|
||||
|
||||
if (head > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
|
||||
/*
|
||||
* First part: import newly received packets.
|
||||
*/
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
|
||||
nic_i = rxr->next_to_check;
|
||||
nm_i = netmap_idx_n2k(kring, nic_i);
|
||||
|
||||
for (n = 0; ; n++) {
|
||||
union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
|
||||
uint32_t staterr = le32toh(curr->wb.upper.status_error);
|
||||
|
||||
if ((staterr & E1000_RXD_STAT_DD) == 0)
|
||||
break;
|
||||
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
|
||||
ring->slot[nm_i].flags = slot_flags;
|
||||
bus_dmamap_sync(rxr->ptag,
|
||||
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
if (n) { /* update the state variables */
|
||||
rxr->next_to_check = nic_i;
|
||||
kring->nr_hwtail = nm_i;
|
||||
}
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: skip past packets that userspace has released.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) {
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
|
||||
struct igb_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
|
||||
|
||||
if (addr == netmap_buffer_base) /* bad buf */
|
||||
goto ring_reset;
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
}
|
||||
curr->wb.upper.status_error = 0;
|
||||
curr->read.pkt_addr = htole64(paddr);
|
||||
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
|
||||
BUS_DMASYNC_PREREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
/*
|
||||
* IMPORTANT: we must leave one free slot in the ring,
|
||||
* so move nic_i back by one unit
|
||||
*/
|
||||
nic_i = nm_prev(nic_i, lim);
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
|
||||
}
|
||||
|
||||
/* tell userspace that there might be new packets */
|
||||
nm_rxsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
|
||||
ring_reset:
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
igb_netmap_attach(struct adapter *adapter)
|
||||
{
|
||||
struct netmap_adapter na;
|
||||
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = igb_netmap_txsync;
|
||||
na.nm_rxsync = igb_netmap_rxsync;
|
||||
na.nm_register = igb_netmap_reg;
|
||||
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
|
||||
netmap_attach(&na);
|
||||
}
|
||||
|
||||
/* end of file */
|
311
netmap/sys/dev/netmap/if_lem_netmap.h
Normal file
311
netmap/sys/dev/netmap/if_lem_netmap.h
Normal file
@ -0,0 +1,311 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/dev/netmap/if_lem_netmap.h 231881 2012-02-17 14:09:04Z luigi $
|
||||
*
|
||||
* netmap support for: lem
|
||||
*
|
||||
* For details on netmap support please see ixgbe_netmap.h
|
||||
*/
|
||||
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h> /* vtophys ? */
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
/*
|
||||
* Register/unregister. We are already under netmap lock.
|
||||
*/
|
||||
static int
|
||||
lem_netmap_reg(struct netmap_adapter *na, int onoff)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
EM_CORE_LOCK(adapter);
|
||||
|
||||
lem_disable_intr(adapter);
|
||||
|
||||
/* Tell the stack that the interface is no longer active */
|
||||
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
|
||||
|
||||
#ifndef EM_LEGACY_IRQ // XXX do we need this ?
|
||||
taskqueue_block(adapter->tq);
|
||||
taskqueue_drain(adapter->tq, &adapter->rxtx_task);
|
||||
taskqueue_drain(adapter->tq, &adapter->link_task);
|
||||
#endif /* !EM_LEGCY_IRQ */
|
||||
|
||||
/* enable or disable flags and callbacks in na and ifp */
|
||||
if (onoff) {
|
||||
nm_set_native_flags(na);
|
||||
} else {
|
||||
nm_clear_native_flags(na);
|
||||
}
|
||||
lem_init_locked(adapter); /* also enable intr */
|
||||
|
||||
#ifndef EM_LEGACY_IRQ
|
||||
taskqueue_unblock(adapter->tq); // XXX do we need this ?
|
||||
#endif /* !EM_LEGCY_IRQ */
|
||||
|
||||
EM_CORE_UNLOCK(adapter);
|
||||
|
||||
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
lem_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = kring->rhead;
|
||||
/* generate an interrupt approximately every half ring */
|
||||
u_int report_frequency = kring->nkr_num_slots >> 1;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
/*
|
||||
* First part: process new packets to send.
|
||||
*/
|
||||
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) { /* we have new packets to send */
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
while (nm_i != head) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
u_int len = slot->len;
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
/* device-specific */
|
||||
struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i];
|
||||
struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i];
|
||||
int flags = (slot->flags & NS_REPORT ||
|
||||
nic_i == 0 || nic_i == report_frequency) ?
|
||||
E1000_TXD_CMD_RS : 0;
|
||||
|
||||
NM_CHECK_ADDR_LEN(addr, len);
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
curr->buffer_addr = htole64(paddr);
|
||||
netmap_reload_map(adapter->txtag, txbuf->map, addr);
|
||||
}
|
||||
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
|
||||
|
||||
/* Fill the slot in the NIC ring. */
|
||||
curr->upper.data = 0;
|
||||
curr->lower.data = htole32(adapter->txd_cmd | len |
|
||||
(E1000_TXD_CMD_EOP | flags) );
|
||||
bus_dmamap_sync(adapter->txtag, txbuf->map,
|
||||
BUS_DMASYNC_PREWRITE);
|
||||
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
/* synchronize the NIC ring */
|
||||
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
|
||||
/* (re)start the tx unit up to slot nic_i (excluded) */
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: reclaim buffers for completed transmissions.
|
||||
*/
|
||||
if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
|
||||
kring->last_reclaim = ticks;
|
||||
/* record completed transmissions using TDH */
|
||||
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
|
||||
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
|
||||
D("TDH wrap %d", nic_i);
|
||||
nic_i -= kring->nkr_num_slots;
|
||||
}
|
||||
adapter->next_tx_to_clean = nic_i;
|
||||
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
|
||||
}
|
||||
|
||||
nm_txsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
lem_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = nm_rxsync_prologue(kring);
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
if (head > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
|
||||
/*
|
||||
* First part: import newly received packets.
|
||||
*/
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
|
||||
nic_i = adapter->next_rx_desc_to_check;
|
||||
nm_i = netmap_idx_n2k(kring, nic_i);
|
||||
|
||||
for (n = 0; ; n++) {
|
||||
struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
|
||||
uint32_t staterr = le32toh(curr->status);
|
||||
int len;
|
||||
|
||||
if ((staterr & E1000_RXD_STAT_DD) == 0)
|
||||
break;
|
||||
len = le16toh(curr->length) - 4; // CRC
|
||||
if (len < 0) {
|
||||
D("bogus pkt size %d nic idx %d", len, nic_i);
|
||||
len = 0;
|
||||
}
|
||||
ring->slot[nm_i].len = len;
|
||||
ring->slot[nm_i].flags = slot_flags;
|
||||
bus_dmamap_sync(adapter->rxtag,
|
||||
adapter->rx_buffer_area[nic_i].map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
if (n) { /* update the state variables */
|
||||
ND("%d new packets at nic %d nm %d tail %d",
|
||||
n,
|
||||
adapter->next_rx_desc_to_check,
|
||||
netmap_idx_n2k(kring, adapter->next_rx_desc_to_check),
|
||||
kring->nr_hwtail);
|
||||
adapter->next_rx_desc_to_check = nic_i;
|
||||
// ifp->if_ipackets += n;
|
||||
kring->nr_hwtail = nm_i;
|
||||
}
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: skip past packets that userspace has released.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) {
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
|
||||
struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i];
|
||||
|
||||
if (addr == netmap_buffer_base) /* bad buf */
|
||||
goto ring_reset;
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
curr->buffer_addr = htole64(paddr);
|
||||
netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
}
|
||||
curr->status = 0;
|
||||
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
|
||||
BUS_DMASYNC_PREREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
/*
|
||||
* IMPORTANT: we must leave one free slot in the ring,
|
||||
* so move nic_i back by one unit
|
||||
*/
|
||||
nic_i = nm_prev(nic_i, lim);
|
||||
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
|
||||
}
|
||||
|
||||
/* tell userspace that there might be new packets */
|
||||
nm_rxsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
|
||||
ring_reset:
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lem_netmap_attach(struct adapter *adapter)
|
||||
{
|
||||
struct netmap_adapter na;
|
||||
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = lem_netmap_txsync;
|
||||
na.nm_rxsync = lem_netmap_rxsync;
|
||||
na.nm_register = lem_netmap_reg;
|
||||
na.num_tx_rings = na.num_rx_rings = 1;
|
||||
netmap_attach(&na);
|
||||
}
|
||||
|
||||
/* end of file */
|
391
netmap/sys/dev/netmap/if_nfe_netmap.h
Normal file
391
netmap/sys/dev/netmap/if_nfe_netmap.h
Normal file
@ -0,0 +1,391 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 231881 2012-02-17 14:09:04Z luigi $
|
||||
*
|
||||
* netmap support for: nfe XXX not yet tested.
|
||||
*
|
||||
* For more details on netmap support please see ixgbe_netmap.h
|
||||
*/
|
||||
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
static int
|
||||
nfe_netmap_init_buffers(struct nfe_softc *sc)
|
||||
{
|
||||
struct netmap_adapter *na = NA(sc->nfe_ifp);
|
||||
struct netmap_slot *slot;
|
||||
int i, l, n, max_avail;
|
||||
struct nfe_desc32 *desc32 = NULL;
|
||||
struct nfe_desc64 *desc64 = NULL;
|
||||
void *addr;
|
||||
uint64_t paddr;
|
||||
|
||||
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
slot = netmap_reset(na, NR_TX, 0, 0);
|
||||
if (!slot)
|
||||
return 0; // XXX cannot happen
|
||||
// XXX init the tx ring
|
||||
n = NFE_TX_RING_COUNT;
|
||||
for (i = 0; i < n; i++) {
|
||||
l = netmap_idx_n2k(&na->tx_rings[0], i);
|
||||
addr = PNMB(slot + l, &paddr);
|
||||
netmap_reload_map(sc->txq.tx_data_tag,
|
||||
sc->txq.data[l].tx_data_map, addr);
|
||||
slot[l].flags = 0;
|
||||
if (sc->nfe_flags & NFE_40BIT_ADDR) {
|
||||
desc64 = &sc->txq.desc64[l];
|
||||
desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
|
||||
desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
|
||||
desc64->vtag = 0;
|
||||
desc64->length = htole16(0);
|
||||
desc64->flags = htole16(0);
|
||||
} else {
|
||||
desc32 = &sc->txq.desc32[l];
|
||||
desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
|
||||
desc32->length = htole16(0);
|
||||
desc32->flags = htole16(0);
|
||||
}
|
||||
}
|
||||
|
||||
slot = netmap_reset(na, NR_RX, 0, 0);
|
||||
// XXX init the rx ring
|
||||
/*
|
||||
* preserve buffers still owned by the driver (and keep one empty).
|
||||
*/
|
||||
n = NFE_RX_RING_COUNT;
|
||||
max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
|
||||
for (i = 0; i < n; i++) {
|
||||
uint16_t flags;
|
||||
l = netmap_idx_n2k(&na->rx_rings[0], i);
|
||||
addr = PNMB(slot + l, &paddr);
|
||||
flags = (i < max_avail) ? NFE_RX_READY : 0;
|
||||
if (sc->nfe_flags & NFE_40BIT_ADDR) {
|
||||
desc64 = &sc->rxq.desc64[l];
|
||||
desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
|
||||
desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
|
||||
desc64->vtag = 0;
|
||||
desc64->length = htole16(NETMAP_BUF_SIZE);
|
||||
desc64->flags = htole16(NFE_RX_READY);
|
||||
} else {
|
||||
desc32 = &sc->rxq.desc32[l];
|
||||
desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
|
||||
desc32->length = htole16(NETMAP_BUF_SIZE);
|
||||
desc32->flags = htole16(NFE_RX_READY);
|
||||
}
|
||||
|
||||
netmap_reload_map(sc->rxq.rx_data_tag,
|
||||
sc->rxq.data[l].rx_data_map, addr);
|
||||
bus_dmamap_sync(sc->rxq.rx_data_tag,
|
||||
sc->rxq.data[l].rx_data_map, BUS_DMASYNC_PREREAD);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Register/unregister. We are already under netmap lock.
|
||||
*/
|
||||
static int
|
||||
nfe_netmap_reg(struct netmap_adapter *na, int onoff)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct nfe_softc *sc = ifp->if_softc;
|
||||
|
||||
NFE_LOCK(sc);
|
||||
nfe_stop(ifp); /* also clear IFF_DRV_RUNNING */
|
||||
if (onoff) {
|
||||
nm_set_native_flags(na);
|
||||
} else {
|
||||
nm_clear_native_flags(na);
|
||||
}
|
||||
nfe_init_locked(sc); /* also enable intr */
|
||||
NFE_UNLOCK(sc);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
nfe_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = kring->rhead;
|
||||
/* generate an interrupt approximately every half ring */
|
||||
u_int report_frequency = kring->nkr_num_slots >> 1;
|
||||
|
||||
/* device-specific */
|
||||
struct nfe_softc *sc = ifp->if_softc;
|
||||
struct nfe_desc32 *desc32 = NULL;
|
||||
struct nfe_desc64 *desc64 = NULL;
|
||||
|
||||
bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
/*
|
||||
* First part: process new packets to send.
|
||||
*/
|
||||
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) { /* we have new packets to send */
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
/* slot is the current slot in the netmap ring */
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
u_int len = slot->len;
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
NM_CHECK_ADDR_LEN(addr, len);
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(sc->txq.tx_data_tag,
|
||||
sc->txq.data[l].tx_data_map, addr);
|
||||
}
|
||||
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
|
||||
|
||||
if (sc->nfe_flags & NFE_40BIT_ADDR) {
|
||||
desc64 = &sc->txq.desc64[l];
|
||||
desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
|
||||
desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
|
||||
desc64->vtag = 0;
|
||||
desc64->length = htole16(len - 1);
|
||||
desc64->flags =
|
||||
htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V2);
|
||||
} else {
|
||||
desc32 = &sc->txq.desc32[l];
|
||||
desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
|
||||
desc32->length = htole16(len - 1);
|
||||
desc32->flags =
|
||||
htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V1);
|
||||
}
|
||||
|
||||
bus_dmamap_sync(sc->txq.tx_data_tag,
|
||||
sc->txq.data[l].tx_data_map, BUS_DMASYNC_PREWRITE);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
sc->txq.cur = nic_i;
|
||||
|
||||
bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
|
||||
/* XXX something missing ? where is the last pkt marker ? */
|
||||
NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: reclaim buffers for completed transmissions.
|
||||
*/
|
||||
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
|
||||
u_int nic_cur = sc->txq.cur;
|
||||
nic_i = sc->txq.next;
|
||||
for (n = 0; nic_i != nic_cur; n++, NFE_INC(nic_i, NFE_TX_RING_COUNT)) {
|
||||
uint16_t flags;
|
||||
if (sc->nfe_flags & NFE_40BIT_ADDR) {
|
||||
desc64 = &sc->txq.desc64[l];
|
||||
flags = le16toh(desc64->flags);
|
||||
} else {
|
||||
desc32 = &sc->txq.desc32[l];
|
||||
flags = le16toh(desc32->flags);
|
||||
}
|
||||
if (flags & NFE_TX_VALID)
|
||||
break;
|
||||
}
|
||||
if (n > 0) {
|
||||
sc->txq.next = nic_i;
|
||||
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
|
||||
}
|
||||
}
|
||||
|
||||
nm_txsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
nfe_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = nm_rxsync_prologue(kring);
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
/* device-specific */
|
||||
struct nfe_softc *sc = ifp->if_softc;
|
||||
struct nfe_desc32 *desc32;
|
||||
struct nfe_desc64 *desc64;
|
||||
|
||||
if (head > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
|
||||
/*
|
||||
* First part: import newly received packets.
|
||||
*/
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
uint16_t flags, len;
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
|
||||
nic_i = sc->rxq.cur;
|
||||
nm_i = netmap_idx_n2k(kring, nic_i);
|
||||
for (n = 0; ; n++) {
|
||||
if (sc->nfe_flags & NFE_40BIT_ADDR) {
|
||||
desc64 = &sc->rxq.desc64[sc->rxq.cur];
|
||||
flags = le16toh(desc64->flags);
|
||||
len = le16toh(desc64->length) & NFE_RX_LEN_MASK;
|
||||
} else {
|
||||
desc32 = &sc->rxq.desc32[sc->rxq.cur];
|
||||
flags = le16toh(desc32->flags);
|
||||
len = le16toh(desc32->length) & NFE_RX_LEN_MASK;
|
||||
}
|
||||
|
||||
if (flags & NFE_RX_READY)
|
||||
break;
|
||||
|
||||
ring->slot[nm_i].len = len;
|
||||
ring->slot[nm_i].flags = slot_flags;
|
||||
bus_dmamap_sync(sc->rxq.rx_data_tag,
|
||||
sc->rxq.data[nic_i].rx_data_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
if (n) { /* update the state variables */
|
||||
sc->rxq.cur = nic_i;
|
||||
kring->nr_hwtail = nm_i;
|
||||
}
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: skip past packets that userspace has released.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) {
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
if (addr == netmap_buffer_base) /* bad buf */
|
||||
goto ring_reset;
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(sc->rxq.rx_data_tag,
|
||||
sc->rxq.data[l].rx_data_map, addr);
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
}
|
||||
if (sc->nfe_flags & NFE_40BIT_ADDR) {
|
||||
desc64 = &sc->rxq.desc64[nic_i];
|
||||
desc64->physaddr[0] =
|
||||
htole32(NFE_ADDR_HI(paddr));
|
||||
desc64->physaddr[1] =
|
||||
htole32(NFE_ADDR_LO(paddr));
|
||||
desc64->length = htole16(NETMAP_BUF_SIZE);
|
||||
desc64->flags = htole16(NFE_RX_READY);
|
||||
} else {
|
||||
desc32 = &sc->rxq.desc32[nic_i];
|
||||
desc32->physaddr =
|
||||
htole32(NFE_ADDR_LO(paddr));
|
||||
desc32->length = htole16(NETMAP_BUF_SIZE);
|
||||
desc32->flags = htole16(NFE_RX_READY);
|
||||
}
|
||||
|
||||
bus_dmamap_sync(sc->rxq.rx_data_tag,
|
||||
sc->rxq.data[nic_i].rx_data_map,
|
||||
BUS_DMASYNC_PREREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
}
|
||||
|
||||
/* tell userspace that there might be new packets */
|
||||
nm_rxsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
|
||||
ring_reset:
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
nfe_netmap_attach(struct nfe_softc *sc)
|
||||
{
|
||||
struct netmap_adapter na;
|
||||
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = sc->nfe_ifp;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = NFE_TX_RING_COUNT;
|
||||
na.num_rx_desc = NFE_RX_RING_COUNT;
|
||||
na.nm_txsync = nfe_netmap_txsync;
|
||||
na.nm_rxsync = nfe_netmap_rxsync;
|
||||
na.nm_register = nfe_netmap_reg;
|
||||
na.num_tx_rings = na.num_rx_rings = 1;
|
||||
netmap_attach(&na, 1);
|
||||
}
|
||||
|
||||
/* end of file */
|
383
netmap/sys/dev/netmap/if_re_netmap.h
Normal file
383
netmap/sys/dev/netmap/if_re_netmap.h
Normal file
@ -0,0 +1,383 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/dev/netmap/if_re_netmap.h 234225 2012-04-13 15:33:12Z luigi $
|
||||
*
|
||||
* netmap support for: re
|
||||
*
|
||||
* For more details on netmap support please see ixgbe_netmap.h
|
||||
*/
|
||||
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h> /* vtophys ? */
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
/*
|
||||
* Register/unregister. We are already under netmap lock.
|
||||
*/
|
||||
static int
|
||||
re_netmap_reg(struct netmap_adapter *na, int onoff)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct rl_softc *adapter = ifp->if_softc;
|
||||
|
||||
RL_LOCK(adapter);
|
||||
re_stop(adapter); /* also clears IFF_DRV_RUNNING */
|
||||
if (onoff) {
|
||||
nm_set_native_flags(na);
|
||||
} else {
|
||||
nm_clear_native_flags(na);
|
||||
}
|
||||
re_init_locked(adapter); /* also enables intr */
|
||||
RL_UNLOCK(adapter);
|
||||
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*/
|
||||
static int
|
||||
re_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = kring->rhead;
|
||||
|
||||
/* device-specific */
|
||||
struct rl_softc *sc = ifp->if_softc;
|
||||
struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
|
||||
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
|
||||
sc->rl_ldata.rl_tx_list_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ?
|
||||
|
||||
/*
|
||||
* First part: process new packets to send.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) { /* we have new packets to send */
|
||||
nic_i = sc->rl_ldata.rl_tx_prodidx;
|
||||
// XXX or netmap_idx_k2n(kring, nm_i);
|
||||
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
u_int len = slot->len;
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
/* device-specific */
|
||||
struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[nic_i];
|
||||
int cmd = slot->len | RL_TDESC_CMD_EOF |
|
||||
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
|
||||
|
||||
NM_CHECK_ADDR_LEN(addr, len);
|
||||
|
||||
if (nic_i == lim) /* mark end of ring */
|
||||
cmd |= RL_TDESC_CMD_EOR;
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
|
||||
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
|
||||
netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
|
||||
txd[nic_i].tx_dmamap, addr);
|
||||
}
|
||||
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
|
||||
|
||||
/* Fill the slot in the NIC ring. */
|
||||
desc->rl_cmdstat = htole32(cmd);
|
||||
|
||||
/* make sure changes to the buffer are synced */
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
|
||||
txd[nic_i].tx_dmamap,
|
||||
BUS_DMASYNC_PREWRITE);
|
||||
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
sc->rl_ldata.rl_tx_prodidx = nic_i;
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
/* synchronize the NIC ring */
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
|
||||
sc->rl_ldata.rl_tx_list_map,
|
||||
BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
|
||||
|
||||
/* start ? */
|
||||
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: reclaim buffers for completed transmissions.
|
||||
*/
|
||||
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
|
||||
nic_i = sc->rl_ldata.rl_tx_considx;
|
||||
for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx;
|
||||
n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) {
|
||||
uint32_t cmdstat =
|
||||
le32toh(sc->rl_ldata.rl_tx_list[nic_i].rl_cmdstat);
|
||||
if (cmdstat & RL_TDESC_STAT_OWN)
|
||||
break;
|
||||
}
|
||||
if (n > 0) {
|
||||
sc->rl_ldata.rl_tx_considx = nic_i;
|
||||
sc->rl_ldata.rl_tx_free += n;
|
||||
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
|
||||
}
|
||||
}
|
||||
|
||||
nm_txsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
*/
|
||||
static int
|
||||
re_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = nm_rxsync_prologue(kring);
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
/* device-specific */
|
||||
struct rl_softc *sc = ifp->if_softc;
|
||||
struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
|
||||
|
||||
if (head > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
|
||||
sc->rl_ldata.rl_rx_list_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
|
||||
/*
|
||||
* First part: import newly received packets.
|
||||
*
|
||||
* This device uses all the buffers in the ring, so we need
|
||||
* another termination condition in addition to RL_RDESC_STAT_OWN
|
||||
* cleared (all buffers could have it cleared). The easiest one
|
||||
* is to stop right before nm_hwcur.
|
||||
*/
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
uint32_t stop_i = nm_prev(kring->nr_hwcur, lim);
|
||||
|
||||
nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
|
||||
nm_i = netmap_idx_n2k(kring, nic_i);
|
||||
|
||||
while (nm_i != stop_i) {
|
||||
struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i];
|
||||
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
|
||||
uint32_t total_len;
|
||||
|
||||
if ((rxstat & RL_RDESC_STAT_OWN) != 0)
|
||||
break;
|
||||
total_len = rxstat & sc->rl_rxlenmask;
|
||||
/* XXX subtract crc */
|
||||
total_len = (total_len < 4) ? 0 : total_len - 4;
|
||||
ring->slot[nm_i].len = total_len;
|
||||
ring->slot[nm_i].flags = slot_flags;
|
||||
/* sync was in re_newbuf() */
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
|
||||
rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);
|
||||
// sc->rl_ifp->if_ipackets++;
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
sc->rl_ldata.rl_rx_prodidx = nic_i;
|
||||
kring->nr_hwtail = nm_i;
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: skip past packets that userspace has released.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) {
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[nic_i];
|
||||
int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
|
||||
|
||||
if (addr == netmap_buffer_base) /* bad buf */
|
||||
goto ring_reset;
|
||||
|
||||
if (nic_i == lim) /* mark end of ring */
|
||||
cmd |= RL_RDESC_CMD_EOR;
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
|
||||
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
|
||||
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
|
||||
rxd[nic_i].rx_dmamap, addr);
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
}
|
||||
desc->rl_cmdstat = htole32(cmd);
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
|
||||
rxd[nic_i].rx_dmamap,
|
||||
BUS_DMASYNC_PREREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
|
||||
sc->rl_ldata.rl_rx_list_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
}
|
||||
|
||||
/* tell userspace that there might be new packets */
|
||||
nm_rxsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
|
||||
ring_reset:
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Additional routines to init the tx and rx rings.
|
||||
* In other drivers we do that inline in the main code.
|
||||
*/
|
||||
static void
|
||||
re_netmap_tx_init(struct rl_softc *sc)
|
||||
{
|
||||
struct rl_txdesc *txd;
|
||||
struct rl_desc *desc;
|
||||
int i, n;
|
||||
struct netmap_adapter *na = NA(sc->rl_ifp);
|
||||
struct netmap_slot *slot;
|
||||
|
||||
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
|
||||
return;
|
||||
}
|
||||
|
||||
slot = netmap_reset(na, NR_TX, 0, 0);
|
||||
/* slot is NULL if we are not in netmap mode */
|
||||
if (!slot)
|
||||
return; // XXX cannot happen
|
||||
/* in netmap mode, overwrite addresses and maps */
|
||||
txd = sc->rl_ldata.rl_tx_desc;
|
||||
desc = sc->rl_ldata.rl_tx_list;
|
||||
n = sc->rl_ldata.rl_tx_desc_cnt;
|
||||
|
||||
/* l points in the netmap ring, i points in the NIC ring */
|
||||
for (i = 0; i < n; i++) {
|
||||
uint64_t paddr;
|
||||
int l = netmap_idx_n2k(&na->tx_rings[0], i);
|
||||
void *addr = PNMB(slot + l, &paddr);
|
||||
|
||||
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
|
||||
desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
|
||||
netmap_load_map(sc->rl_ldata.rl_tx_mtag,
|
||||
txd[i].tx_dmamap, addr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
re_netmap_rx_init(struct rl_softc *sc)
|
||||
{
|
||||
struct netmap_adapter *na = NA(sc->rl_ifp);
|
||||
struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
|
||||
struct rl_desc *desc = sc->rl_ldata.rl_rx_list;
|
||||
uint32_t cmdstat;
|
||||
uint32_t nic_i, max_avail;
|
||||
uint32_t const n = sc->rl_ldata.rl_rx_desc_cnt;
|
||||
|
||||
if (!slot)
|
||||
return;
|
||||
/*
|
||||
* Do not release the slots owned by userspace,
|
||||
* and also keep one empty.
|
||||
*/
|
||||
max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
|
||||
for (nic_i = 0; nic_i < n; nic_i++) {
|
||||
void *addr;
|
||||
uint64_t paddr;
|
||||
uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i);
|
||||
|
||||
addr = PNMB(slot + nm_i, &paddr);
|
||||
|
||||
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
|
||||
sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, addr);
|
||||
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
|
||||
sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, BUS_DMASYNC_PREREAD);
|
||||
desc[nic_i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
|
||||
desc[nic_i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
|
||||
cmdstat = NETMAP_BUF_SIZE;
|
||||
if (nic_i == n - 1) /* mark the end of ring */
|
||||
cmdstat |= RL_RDESC_CMD_EOR;
|
||||
if (nic_i < max_avail)
|
||||
cmdstat |= RL_RDESC_CMD_OWN;
|
||||
desc[nic_i].rl_cmdstat = htole32(cmdstat);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
re_netmap_attach(struct rl_softc *sc)
|
||||
{
|
||||
struct netmap_adapter na;
|
||||
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = sc->rl_ifp;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt;
|
||||
na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt;
|
||||
na.nm_txsync = re_netmap_txsync;
|
||||
na.nm_rxsync = re_netmap_rxsync;
|
||||
na.nm_register = re_netmap_reg;
|
||||
na.num_tx_rings = na.num_rx_rings = 1;
|
||||
netmap_attach(&na);
|
||||
}
|
||||
|
||||
/* end of file */
|
495
netmap/sys/dev/netmap/ixgbe_netmap.h
Normal file
495
netmap/sys/dev/netmap/ixgbe_netmap.h
Normal file
@ -0,0 +1,495 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 244514 2012-12-20 22:26:03Z luigi $
|
||||
*
|
||||
* netmap support for: ixgbe
|
||||
*
|
||||
* This file is meant to be a reference on how to implement
|
||||
* netmap support for a network driver.
|
||||
* This file contains code but only static or inline functions used
|
||||
* by a single driver. To avoid replication of code we just #include
|
||||
* it near the beginning of the standard driver.
|
||||
*/
|
||||
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <sys/selinfo.h>
|
||||
/*
|
||||
* Some drivers may need the following headers. Others
|
||||
* already include them by default
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
*/
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
/*
|
||||
* device-specific sysctl variables:
|
||||
*
|
||||
* ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
|
||||
* During regular operations the CRC is stripped, but on some
|
||||
* hardware reception of frames not multiple of 64 is slower,
|
||||
* so using crcstrip=0 helps in benchmarks.
|
||||
*
|
||||
* ix_rx_miss, ix_rx_miss_bufs:
|
||||
* count packets that might be missed due to lost interrupts.
|
||||
*/
|
||||
SYSCTL_DECL(_dev_netmap);
|
||||
static int ix_rx_miss, ix_rx_miss_bufs, ix_crcstrip;
|
||||
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
|
||||
CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
|
||||
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
|
||||
CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
|
||||
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
|
||||
CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs");
|
||||
|
||||
|
||||
static void
|
||||
set_crcstrip(struct ixgbe_hw *hw, int onoff)
|
||||
{
|
||||
/* crc stripping is set in two places:
|
||||
* IXGBE_HLREG0 (modified on init_locked and hw reset)
|
||||
* IXGBE_RDRXCTL (set by the original driver in
|
||||
* ixgbe_setup_hw_rsc() called in init_locked.
|
||||
* We disable the setting when netmap is compiled in).
|
||||
* We update the values here, but also in ixgbe.c because
|
||||
* init_locked sometimes is called outside our control.
|
||||
*/
|
||||
uint32_t hl, rxc;
|
||||
|
||||
hl = IXGBE_READ_REG(hw, IXGBE_HLREG0);
|
||||
rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
|
||||
if (netmap_verbose)
|
||||
D("%s read HLREG 0x%x rxc 0x%x",
|
||||
onoff ? "enter" : "exit", hl, rxc);
|
||||
/* hw requirements ... */
|
||||
rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
|
||||
rxc |= IXGBE_RDRXCTL_RSCACKC;
|
||||
if (onoff && !ix_crcstrip) {
|
||||
/* keep the crc. Fast rx */
|
||||
hl &= ~IXGBE_HLREG0_RXCRCSTRP;
|
||||
rxc &= ~IXGBE_RDRXCTL_CRCSTRIP;
|
||||
} else {
|
||||
/* reset default mode */
|
||||
hl |= IXGBE_HLREG0_RXCRCSTRP;
|
||||
rxc |= IXGBE_RDRXCTL_CRCSTRIP;
|
||||
}
|
||||
if (netmap_verbose)
|
||||
D("%s write HLREG 0x%x rxc 0x%x",
|
||||
onoff ? "enter" : "exit", hl, rxc);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Register/unregister. We are already under netmap lock.
|
||||
* Only called on the first register or the last unregister.
|
||||
*/
|
||||
static int
|
||||
ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
|
||||
IXGBE_CORE_LOCK(adapter);
|
||||
ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
|
||||
|
||||
/* Tell the stack that the interface is no longer active */
|
||||
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
|
||||
|
||||
set_crcstrip(&adapter->hw, onoff);
|
||||
/* enable or disable flags and callbacks in na and ifp */
|
||||
if (onoff) {
|
||||
nm_set_native_flags(na);
|
||||
} else {
|
||||
nm_clear_native_flags(na);
|
||||
}
|
||||
ixgbe_init_locked(adapter); /* also enables intr */
|
||||
set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
|
||||
IXGBE_CORE_UNLOCK(adapter);
|
||||
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the transmit ring.
|
||||
*
|
||||
* All information is in the kring.
|
||||
* Userspace wants to send packets up to the one before kring->rhead,
|
||||
* kernel knows kring->nr_hwcur is the first unsent packet.
|
||||
*
|
||||
* Here we push packets out (as many as possible), and possibly
|
||||
* reclaim buffers from previously completed transmission.
|
||||
*
|
||||
* The caller (netmap) guarantees that there is only one instance
|
||||
* running at any time. Any interference with other driver
|
||||
* methods should be handled by the individual drivers.
|
||||
*/
|
||||
static int
|
||||
ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = kring->rhead;
|
||||
/*
|
||||
* interrupts on every tx packet are expensive so request
|
||||
* them every half ring, or where NS_REPORT is set
|
||||
*/
|
||||
u_int report_frequency = kring->nkr_num_slots >> 1;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
|
||||
int reclaim_tx;
|
||||
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD);
|
||||
|
||||
/*
|
||||
* First part: process new packets to send.
|
||||
* nm_i is the current index in the netmap ring,
|
||||
* nic_i is the corresponding index in the NIC ring.
|
||||
* The two numbers differ because upon a *_init() we reset
|
||||
* the NIC ring but leave the netmap ring unchanged.
|
||||
* For the transmit ring, we have
|
||||
*
|
||||
* nm_i = kring->nr_hwcur
|
||||
* nic_i = IXGBE_TDT (not tracked in the driver)
|
||||
* and
|
||||
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
|
||||
*
|
||||
* In this driver kring->nkr_hwofs >= 0, but for other
|
||||
* drivers it might be negative as well.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If we have packets to send (kring->nr_hwcur != kring->rhead)
|
||||
* iterate over the netmap ring, fetch length and update
|
||||
* the corresponding slot in the NIC ring. Some drivers also
|
||||
* need to update the buffer's physical address in the NIC slot
|
||||
* even NS_BUF_CHANGED is not set (PNMB computes the addresses).
|
||||
*
|
||||
* The netmap_reload_map() calls is especially expensive,
|
||||
* even when (as in this case) the tag is 0, so do only
|
||||
* when the buffer has actually changed.
|
||||
*
|
||||
* If possible do not set the report/intr bit on all slots,
|
||||
* but only a few times per ring or when NS_REPORT is set.
|
||||
*
|
||||
* Finally, on 10G and faster drivers, it might be useful
|
||||
* to prefetch the next slot and txr entry.
|
||||
*/
|
||||
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) { /* we have new packets to send */
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
|
||||
__builtin_prefetch(&ring->slot[nm_i]);
|
||||
__builtin_prefetch(&txr->tx_buffers[nic_i]);
|
||||
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
u_int len = slot->len;
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
/* device-specific */
|
||||
union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
|
||||
struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
|
||||
int flags = (slot->flags & NS_REPORT ||
|
||||
nic_i == 0 || nic_i == report_frequency) ?
|
||||
IXGBE_TXD_CMD_RS : 0;
|
||||
|
||||
/* prefetch for next round */
|
||||
__builtin_prefetch(&ring->slot[nm_i + 1]);
|
||||
__builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
|
||||
|
||||
NM_CHECK_ADDR_LEN(addr, len);
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(txr->txtag, txbuf->map, addr);
|
||||
}
|
||||
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
|
||||
|
||||
/* Fill the slot in the NIC ring. */
|
||||
/* Use legacy descriptor, they are faster? */
|
||||
curr->read.buffer_addr = htole64(paddr);
|
||||
curr->read.olinfo_status = 0;
|
||||
curr->read.cmd_type_len = htole32(len | flags |
|
||||
IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
|
||||
|
||||
/* make sure changes to the buffer are synced */
|
||||
bus_dmamap_sync(txr->txtag, txbuf->map,
|
||||
BUS_DMASYNC_PREWRITE);
|
||||
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
/* synchronize the NIC ring */
|
||||
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
|
||||
/* (re)start the tx unit up to slot nic_i (excluded) */
|
||||
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), nic_i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: reclaim buffers for completed transmissions.
|
||||
* Because this is expensive (we read a NIC register etc.)
|
||||
* we only do it in specific cases (see below).
|
||||
*/
|
||||
if (flags & NAF_FORCE_RECLAIM) {
|
||||
reclaim_tx = 1; /* forced reclaim */
|
||||
} else if (!nm_kr_txempty(kring)) {
|
||||
reclaim_tx = 0; /* have buffers, no reclaim */
|
||||
} else {
|
||||
/*
|
||||
* No buffers available. Locate previous slot with
|
||||
* REPORT_STATUS set.
|
||||
* If the slot has DD set, we can reclaim space,
|
||||
* otherwise wait for the next interrupt.
|
||||
* This enables interrupt moderation on the tx
|
||||
* side though it might reduce throughput.
|
||||
*/
|
||||
struct ixgbe_legacy_tx_desc *txd =
|
||||
(struct ixgbe_legacy_tx_desc *)txr->tx_base;
|
||||
|
||||
nic_i = txr->next_to_clean + report_frequency;
|
||||
if (nic_i > lim)
|
||||
nic_i -= lim + 1;
|
||||
// round to the closest with dd set
|
||||
nic_i = (nic_i < kring->nkr_num_slots / 4 ||
|
||||
nic_i >= kring->nkr_num_slots*3/4) ?
|
||||
0 : report_frequency;
|
||||
reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
|
||||
}
|
||||
if (reclaim_tx) {
|
||||
/*
|
||||
* Record completed transmissions.
|
||||
* We (re)use the driver's txr->next_to_clean to keep
|
||||
* track of the most recently completed transmission.
|
||||
*
|
||||
* The datasheet discourages the use of TDH to find
|
||||
* out the number of sent packets, but we only set
|
||||
* REPORT_STATUS in a few slots so TDH is the only
|
||||
* good way.
|
||||
*/
|
||||
nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id));
|
||||
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
|
||||
D("TDH wrap %d", nic_i);
|
||||
nic_i -= kring->nkr_num_slots;
|
||||
}
|
||||
if (nic_i != txr->next_to_clean) {
|
||||
/* some tx completed, increment avail */
|
||||
txr->next_to_clean = nic_i;
|
||||
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
|
||||
}
|
||||
}
|
||||
|
||||
nm_txsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconcile kernel and user view of the receive ring.
|
||||
* Same as for the txsync, this routine must be efficient.
|
||||
* The caller guarantees a single invocations, but races against
|
||||
* the rest of the driver should be handled here.
|
||||
*
|
||||
* On call, kring->rhead is the first packet that userspace wants
|
||||
* to keep, and kring->rcur is the wakeup point.
|
||||
* The kernel has previously reported packets up to kring->rtail.
|
||||
*
|
||||
* If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
|
||||
* of whether or not we received an interrupt.
|
||||
*/
|
||||
static int
|
||||
ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */
|
||||
u_int nic_i; /* index into the NIC ring */
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = nm_rxsync_prologue(kring);
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
/* device-specific */
|
||||
struct adapter *adapter = ifp->if_softc;
|
||||
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
|
||||
|
||||
if (head > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
/* XXX check sync modes */
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
|
||||
|
||||
/*
|
||||
* First part: import newly received packets.
|
||||
*
|
||||
* nm_i is the index of the next free slot in the netmap ring,
|
||||
* nic_i is the index of the next received packet in the NIC ring,
|
||||
* and they may differ in case if_init() has been called while
|
||||
* in netmap mode. For the receive ring we have
|
||||
*
|
||||
* nic_i = rxr->next_to_check;
|
||||
* nm_i = kring->nr_hwtail (previous)
|
||||
* and
|
||||
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
|
||||
*
|
||||
* rxr->next_to_check is set to 0 on a ring reinit
|
||||
*/
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
int crclen = ix_crcstrip ? 0 : 4;
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
|
||||
nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
|
||||
nm_i = netmap_idx_n2k(kring, nic_i);
|
||||
|
||||
for (n = 0; ; n++) {
|
||||
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
|
||||
uint32_t staterr = le32toh(curr->wb.upper.status_error);
|
||||
|
||||
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
|
||||
break;
|
||||
ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
|
||||
ring->slot[nm_i].flags = slot_flags;
|
||||
bus_dmamap_sync(rxr->ptag,
|
||||
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
if (n) { /* update the state variables */
|
||||
if (netmap_no_pendintr && !force_update) {
|
||||
/* diagnostics */
|
||||
ix_rx_miss ++;
|
||||
ix_rx_miss_bufs += n;
|
||||
}
|
||||
rxr->next_to_check = nic_i;
|
||||
kring->nr_hwtail = nm_i;
|
||||
}
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Second part: skip past packets that userspace has released.
|
||||
* (kring->nr_hwcur to kring->rhead excluded),
|
||||
* and make the buffers available for reception.
|
||||
* As usual nm_i is the index in the netmap ring,
|
||||
* nic_i is the index in the NIC ring, and
|
||||
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) {
|
||||
nic_i = netmap_idx_k2n(kring, nm_i);
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
uint64_t paddr;
|
||||
void *addr = PNMB(slot, &paddr);
|
||||
|
||||
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
|
||||
struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
|
||||
|
||||
if (addr == netmap_buffer_base) /* bad buf */
|
||||
goto ring_reset;
|
||||
|
||||
if (slot->flags & NS_BUF_CHANGED) {
|
||||
/* buffer has changed, reload map */
|
||||
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
}
|
||||
curr->wb.upper.status_error = 0;
|
||||
curr->read.pkt_addr = htole64(paddr);
|
||||
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
|
||||
BUS_DMASYNC_PREREAD);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
nic_i = nm_next(nic_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
|
||||
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
|
||||
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
|
||||
/*
|
||||
* IMPORTANT: we must leave one free slot in the ring,
|
||||
* so move nic_i back by one unit
|
||||
*/
|
||||
nic_i = nm_prev(nic_i, lim);
|
||||
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
|
||||
}
|
||||
|
||||
/* tell userspace that there might be new packets */
|
||||
nm_rxsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
|
||||
ring_reset:
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The attach routine, called near the end of ixgbe_attach(),
|
||||
* fills the parameters for netmap_attach() and calls it.
|
||||
* It cannot fail, in the worst case (such as no memory)
|
||||
* netmap mode will be disabled and the driver will only
|
||||
* operate in standard mode.
|
||||
*/
|
||||
static void
|
||||
ixgbe_netmap_attach(struct adapter *adapter)
|
||||
{
|
||||
struct netmap_adapter na;
|
||||
|
||||
bzero(&na, sizeof(na));
|
||||
|
||||
na.ifp = adapter->ifp;
|
||||
na.na_flags = NAF_BDG_MAYSLEEP;
|
||||
na.num_tx_desc = adapter->num_tx_desc;
|
||||
na.num_rx_desc = adapter->num_rx_desc;
|
||||
na.nm_txsync = ixgbe_netmap_txsync;
|
||||
na.nm_rxsync = ixgbe_netmap_rxsync;
|
||||
na.nm_register = ixgbe_netmap_reg;
|
||||
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
|
||||
netmap_attach(&na);
|
||||
}
|
||||
|
||||
/* end of file */
|
2616
netmap/sys/dev/netmap/netmap.c
Normal file
2616
netmap/sys/dev/netmap/netmap.c
Normal file
File diff suppressed because it is too large
Load Diff
657
netmap/sys/dev/netmap/netmap_freebsd.c
Normal file
657
netmap/sys/dev/netmap/netmap_freebsd.c
Normal file
@ -0,0 +1,657 @@
|
||||
/*
|
||||
* Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/param.h> /* defines used in kernel.h */
|
||||
#include <sys/poll.h> /* POLLIN, POLLOUT */
|
||||
#include <sys/kernel.h> /* types used in module initialization */
|
||||
#include <sys/conf.h> /* DEV_MODULE */
|
||||
#include <sys/endian.h>
|
||||
|
||||
#include <sys/rwlock.h>
|
||||
|
||||
#include <vm/vm.h> /* vtophys */
|
||||
#include <vm/pmap.h> /* vtophys */
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pager.h>
|
||||
#include <vm/uma.h>
|
||||
|
||||
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/socket.h> /* sockaddrs */
|
||||
#include <sys/selinfo.h>
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <machine/bus.h> /* bus_dmamap_* */
|
||||
#include <netinet/in.h> /* in6_cksum_pseudo() */
|
||||
#include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
#include <dev/netmap/netmap_mem2.h>
|
||||
|
||||
|
||||
/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
|
||||
|
||||
rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
|
||||
{
|
||||
/* TODO XXX please use the FreeBSD implementation for this. */
|
||||
uint16_t *words = (uint16_t *)data;
|
||||
int nw = len / 2;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nw; i++)
|
||||
cur_sum += be16toh(words[i]);
|
||||
|
||||
if (len & 1)
|
||||
cur_sum += (data[len-1] << 8);
|
||||
|
||||
return cur_sum;
|
||||
}
|
||||
|
||||
/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
|
||||
* return value is in network byte order.
|
||||
*/
|
||||
uint16_t nm_csum_fold(rawsum_t cur_sum)
|
||||
{
|
||||
/* TODO XXX please use the FreeBSD implementation for this. */
|
||||
while (cur_sum >> 16)
|
||||
cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
|
||||
|
||||
return htobe16((~cur_sum) & 0xFFFF);
|
||||
}
|
||||
|
||||
uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
|
||||
{
|
||||
#if 0
|
||||
return in_cksum_hdr((void *)iph);
|
||||
#else
|
||||
return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
|
||||
#endif
|
||||
}
|
||||
|
||||
void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
|
||||
size_t datalen, uint16_t *check)
|
||||
{
|
||||
uint16_t pseudolen = datalen + iph->protocol;
|
||||
|
||||
/* Compute and insert the pseudo-header cheksum. */
|
||||
*check = in_pseudo(iph->saddr, iph->daddr,
|
||||
htobe16(pseudolen));
|
||||
/* Compute the checksum on TCP/UDP header + payload
|
||||
* (includes the pseudo-header).
|
||||
*/
|
||||
*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
|
||||
}
|
||||
|
||||
void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
|
||||
size_t datalen, uint16_t *check)
|
||||
{
|
||||
#ifdef INET6
|
||||
*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
|
||||
*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
|
||||
#else
|
||||
static int notsupported = 0;
|
||||
if (!notsupported) {
|
||||
notsupported = 1;
|
||||
D("inet6 segmentation not supported");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Intercept the rx routine in the standard device driver.
|
||||
* Second argument is non-zero to intercept, 0 to restore
|
||||
*/
|
||||
int
|
||||
netmap_catch_rx(struct netmap_adapter *na, int intercept)
|
||||
{
|
||||
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
|
||||
if (intercept) {
|
||||
if (gna->save_if_input) {
|
||||
D("cannot intercept again");
|
||||
return EINVAL; /* already set */
|
||||
}
|
||||
gna->save_if_input = ifp->if_input;
|
||||
ifp->if_input = generic_rx_handler;
|
||||
} else {
|
||||
if (!gna->save_if_input){
|
||||
D("cannot restore");
|
||||
return EINVAL; /* not saved */
|
||||
}
|
||||
ifp->if_input = gna->save_if_input;
|
||||
gna->save_if_input = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Intercept the packet steering routine in the tx path,
|
||||
* so that we can decide which queue is used for an mbuf.
|
||||
* Second argument is non-zero to intercept, 0 to restore.
|
||||
* On freebsd we just intercept if_transmit.
|
||||
*/
|
||||
void
|
||||
netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
|
||||
{
|
||||
struct netmap_adapter *na = &gna->up.up;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
|
||||
if (enable) {
|
||||
na->if_transmit = ifp->if_transmit;
|
||||
ifp->if_transmit = netmap_transmit;
|
||||
} else {
|
||||
ifp->if_transmit = na->if_transmit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Transmit routine used by generic_netmap_txsync(). Returns 0 on success
|
||||
* and non-zero on error (which may be packet drops or other errors).
|
||||
* addr and len identify the netmap buffer, m is the (preallocated)
|
||||
* mbuf to use for transmissions.
|
||||
*
|
||||
* We should add a reference to the mbuf so the m_freem() at the end
|
||||
* of the transmission does not consume resources.
|
||||
*
|
||||
* On FreeBSD, and on multiqueue cards, we can force the queue using
|
||||
* if ((m->m_flags & M_FLOWID) != 0)
|
||||
* i = m->m_pkthdr.flowid % adapter->num_queues;
|
||||
* else
|
||||
* i = curcpu % adapter->num_queues;
|
||||
*
|
||||
*/
|
||||
int
|
||||
generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
|
||||
void *addr, u_int len, u_int ring_nr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
m->m_len = m->m_pkthdr.len = 0;
|
||||
|
||||
// copy data to the mbuf
|
||||
m_copyback(m, 0, len, addr);
|
||||
// inc refcount. We are alone, so we can skip the atomic
|
||||
atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
|
||||
m->m_flags |= M_FLOWID;
|
||||
m->m_pkthdr.flowid = ring_nr;
|
||||
m->m_pkthdr.rcvif = ifp; /* used for tx notification */
|
||||
ret = NA(ifp)->if_transmit(ifp, m);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The following two functions are empty until we have a generic
|
||||
* way to extract the info from the ifp
|
||||
*/
|
||||
int
|
||||
generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
|
||||
{
|
||||
D("called");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
|
||||
{
|
||||
D("called");
|
||||
*txq = netmap_generic_rings;
|
||||
*rxq = netmap_generic_rings;
|
||||
}
|
||||
|
||||
|
||||
void netmap_mitigation_init(struct nm_generic_mit *mit, int idx,
|
||||
struct netmap_adapter *na)
|
||||
{
|
||||
ND("called");
|
||||
mit->mit_pending = 0;
|
||||
mit->mit_ring_idx = idx;
|
||||
mit->mit_na = na;
|
||||
}
|
||||
|
||||
|
||||
void netmap_mitigation_start(struct nm_generic_mit *mit)
|
||||
{
|
||||
ND("called");
|
||||
}
|
||||
|
||||
|
||||
void netmap_mitigation_restart(struct nm_generic_mit *mit)
|
||||
{
|
||||
ND("called");
|
||||
}
|
||||
|
||||
|
||||
int netmap_mitigation_active(struct nm_generic_mit *mit)
|
||||
{
|
||||
ND("called");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void netmap_mitigation_cleanup(struct nm_generic_mit *mit)
|
||||
{
|
||||
ND("called");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* In order to track whether pages are still mapped, we hook into
|
||||
* the standard cdev_pager and intercept the constructor and
|
||||
* destructor.
|
||||
*/
|
||||
|
||||
struct netmap_vm_handle_t {
|
||||
struct cdev *dev;
|
||||
struct netmap_priv_d *priv;
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
|
||||
vm_ooffset_t foff, struct ucred *cred, u_short *color)
|
||||
{
|
||||
struct netmap_vm_handle_t *vmh = handle;
|
||||
|
||||
if (netmap_verbose)
|
||||
D("handle %p size %jd prot %d foff %jd",
|
||||
handle, (intmax_t)size, prot, (intmax_t)foff);
|
||||
dev_ref(vmh->dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
netmap_dev_pager_dtor(void *handle)
|
||||
{
|
||||
struct netmap_vm_handle_t *vmh = handle;
|
||||
struct cdev *dev = vmh->dev;
|
||||
struct netmap_priv_d *priv = vmh->priv;
|
||||
|
||||
if (netmap_verbose)
|
||||
D("handle %p", handle);
|
||||
netmap_dtor(priv);
|
||||
free(vmh, M_DEVBUF);
|
||||
dev_rel(dev);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
|
||||
int prot, vm_page_t *mres)
|
||||
{
|
||||
struct netmap_vm_handle_t *vmh = object->handle;
|
||||
struct netmap_priv_d *priv = vmh->priv;
|
||||
vm_paddr_t paddr;
|
||||
vm_page_t page;
|
||||
vm_memattr_t memattr;
|
||||
vm_pindex_t pidx;
|
||||
|
||||
ND("object %p offset %jd prot %d mres %p",
|
||||
object, (intmax_t)offset, prot, mres);
|
||||
memattr = object->memattr;
|
||||
pidx = OFF_TO_IDX(offset);
|
||||
paddr = netmap_mem_ofstophys(priv->np_mref, offset);
|
||||
if (paddr == 0)
|
||||
return VM_PAGER_FAIL;
|
||||
|
||||
if (((*mres)->flags & PG_FICTITIOUS) != 0) {
|
||||
/*
|
||||
* If the passed in result page is a fake page, update it with
|
||||
* the new physical address.
|
||||
*/
|
||||
page = *mres;
|
||||
vm_page_updatefake(page, paddr, memattr);
|
||||
} else {
|
||||
/*
|
||||
* Replace the passed in reqpage page with our own fake page and
|
||||
* free up the all of the original pages.
|
||||
*/
|
||||
#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */
|
||||
#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
|
||||
#define VM_OBJECT_WLOCK VM_OBJECT_LOCK
|
||||
#endif /* VM_OBJECT_WUNLOCK */
|
||||
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
page = vm_page_getfake(paddr, memattr);
|
||||
VM_OBJECT_WLOCK(object);
|
||||
vm_page_lock(*mres);
|
||||
vm_page_free(*mres);
|
||||
vm_page_unlock(*mres);
|
||||
*mres = page;
|
||||
vm_page_insert(page, object, pidx);
|
||||
}
|
||||
page->valid = VM_PAGE_BITS_ALL;
|
||||
return (VM_PAGER_OK);
|
||||
}
|
||||
|
||||
|
||||
static struct cdev_pager_ops netmap_cdev_pager_ops = {
|
||||
.cdev_pg_ctor = netmap_dev_pager_ctor,
|
||||
.cdev_pg_dtor = netmap_dev_pager_dtor,
|
||||
.cdev_pg_fault = netmap_dev_pager_fault,
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
|
||||
vm_size_t objsize, vm_object_t *objp, int prot)
|
||||
{
|
||||
int error;
|
||||
struct netmap_vm_handle_t *vmh;
|
||||
struct netmap_priv_d *priv;
|
||||
vm_object_t obj;
|
||||
|
||||
if (netmap_verbose)
|
||||
D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
|
||||
(intmax_t )*foff, (intmax_t )objsize, objp, prot);
|
||||
|
||||
vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (vmh == NULL)
|
||||
return ENOMEM;
|
||||
vmh->dev = cdev;
|
||||
|
||||
NMG_LOCK();
|
||||
error = devfs_get_cdevpriv((void**)&priv);
|
||||
if (error)
|
||||
goto err_unlock;
|
||||
vmh->priv = priv;
|
||||
priv->np_refcount++;
|
||||
NMG_UNLOCK();
|
||||
|
||||
error = netmap_get_memory(priv);
|
||||
if (error)
|
||||
goto err_deref;
|
||||
|
||||
obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
|
||||
&netmap_cdev_pager_ops, objsize, prot,
|
||||
*foff, NULL);
|
||||
if (obj == NULL) {
|
||||
D("cdev_pager_allocate failed");
|
||||
error = EINVAL;
|
||||
goto err_deref;
|
||||
}
|
||||
|
||||
*objp = obj;
|
||||
return 0;
|
||||
|
||||
err_deref:
|
||||
NMG_LOCK();
|
||||
priv->np_refcount--;
|
||||
err_unlock:
|
||||
NMG_UNLOCK();
|
||||
// err:
|
||||
free(vmh, M_DEVBUF);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// XXX can we remove this ?
|
||||
static int
|
||||
netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
|
||||
{
|
||||
if (netmap_verbose)
|
||||
D("dev %p fflag 0x%x devtype %d td %p",
|
||||
dev, fflag, devtype, td);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
|
||||
{
|
||||
struct netmap_priv_d *priv;
|
||||
int error;
|
||||
|
||||
(void)dev;
|
||||
(void)oflags;
|
||||
(void)devtype;
|
||||
(void)td;
|
||||
|
||||
// XXX wait or nowait ?
|
||||
priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (priv == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
error = devfs_set_cdevpriv(priv, netmap_dtor);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
priv->np_refcount = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/******************** kqueue support ****************/
|
||||
|
||||
/*
|
||||
* The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
|
||||
* We use a non-zero argument to distinguish the call from the one
|
||||
* in kevent_scan() which instead also needs to run netmap_poll().
|
||||
* The knote uses a global mutex for the time being. We might
|
||||
* try to reuse the one in the si, but it is not allocated
|
||||
* permanently so it might be a bit tricky.
|
||||
*
|
||||
* The *kqfilter function registers one or another f_event
|
||||
* depending on read or write mode.
|
||||
* In the call to f_event() td_fpop is NULL so any child function
|
||||
* calling devfs_get_cdevpriv() would fail - and we need it in
|
||||
* netmap_poll(). As a workaround we store priv into kn->kn_hook
|
||||
* and pass it as first argument to netmap_poll(), which then
|
||||
* uses the failure to tell that we are called from f_event()
|
||||
* and do not need the selrecord().
|
||||
*/
|
||||
|
||||
void freebsd_selwakeup(struct selinfo *si, int pri);
|
||||
|
||||
void
|
||||
freebsd_selwakeup(struct selinfo *si, int pri)
|
||||
{
|
||||
if (netmap_verbose)
|
||||
D("on knote %p", &si->si_note);
|
||||
selwakeuppri(si, pri);
|
||||
/* use a non-zero hint to tell the notification from the
|
||||
* call done in kqueue_scan() which uses 0
|
||||
*/
|
||||
KNOTE_UNLOCKED(&si->si_note, 0x100 /* notification */);
|
||||
}
|
||||
|
||||
static void
|
||||
netmap_knrdetach(struct knote *kn)
|
||||
{
|
||||
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
|
||||
struct selinfo *si = priv->np_rxsi;
|
||||
|
||||
D("remove selinfo %p", si);
|
||||
knlist_remove(&si->si_note, kn, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
netmap_knwdetach(struct knote *kn)
|
||||
{
|
||||
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
|
||||
struct selinfo *si = priv->np_txsi;
|
||||
|
||||
D("remove selinfo %p", si);
|
||||
knlist_remove(&si->si_note, kn, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* callback from notifies (generated externally) and our
|
||||
* calls to kevent(). The former we just return 1 (ready)
|
||||
* since we do not know better.
|
||||
* In the latter we call netmap_poll and return 0/1 accordingly.
|
||||
*/
|
||||
static int
|
||||
netmap_knrw(struct knote *kn, long hint, int events)
|
||||
{
|
||||
struct netmap_priv_d *priv;
|
||||
int revents;
|
||||
|
||||
if (hint != 0) {
|
||||
ND(5, "call from notify");
|
||||
return 1; /* assume we are ready */
|
||||
}
|
||||
priv = kn->kn_hook;
|
||||
/* the notification may come from an external thread,
|
||||
* in which case we do not want to run the netmap_poll
|
||||
* This should be filtered above, but check just in case.
|
||||
*/
|
||||
if (curthread != priv->np_td) { /* should not happen */
|
||||
RD(5, "curthread changed %p %p", curthread, priv->np_td);
|
||||
return 1;
|
||||
} else {
|
||||
revents = netmap_poll((void *)priv, events, curthread);
|
||||
return (events & revents) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
netmap_knread(struct knote *kn, long hint)
|
||||
{
|
||||
return netmap_knrw(kn, hint, POLLIN);
|
||||
}
|
||||
|
||||
static int
|
||||
netmap_knwrite(struct knote *kn, long hint)
|
||||
{
|
||||
return netmap_knrw(kn, hint, POLLOUT);
|
||||
}
|
||||
|
||||
static struct filterops netmap_rfiltops = {
|
||||
.f_isfd = 1,
|
||||
.f_detach = netmap_knrdetach,
|
||||
.f_event = netmap_knread,
|
||||
};
|
||||
|
||||
static struct filterops netmap_wfiltops = {
|
||||
.f_isfd = 1,
|
||||
.f_detach = netmap_knwdetach,
|
||||
.f_event = netmap_knwrite,
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* This is called when a thread invokes kevent() to record
|
||||
* a change in the configuration of the kqueue().
|
||||
* The 'priv' should be the same as in the netmap device.
|
||||
*/
|
||||
static int
|
||||
netmap_kqfilter(struct cdev *dev, struct knote *kn)
|
||||
{
|
||||
struct netmap_priv_d *priv;
|
||||
int error;
|
||||
struct netmap_adapter *na;
|
||||
struct selinfo *si;
|
||||
int ev = kn->kn_filter;
|
||||
|
||||
if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
|
||||
D("bad filter request %d", ev);
|
||||
return 1;
|
||||
}
|
||||
error = devfs_get_cdevpriv((void**)&priv);
|
||||
if (error) {
|
||||
D("device not yet setup");
|
||||
return 1;
|
||||
}
|
||||
na = priv->np_na;
|
||||
if (na == NULL) {
|
||||
D("no netmap adapter for this file descriptor");
|
||||
return 1;
|
||||
}
|
||||
/* the si is indicated in the priv */
|
||||
si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
|
||||
// XXX lock(priv) ?
|
||||
kn->kn_fop = (ev == EVFILT_WRITE) ?
|
||||
&netmap_wfiltops : &netmap_rfiltops;
|
||||
kn->kn_hook = priv;
|
||||
knlist_add(&si->si_note, kn, 1);
|
||||
// XXX unlock(priv)
|
||||
ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
|
||||
na, na->ifp->if_xname, curthread, priv, kn,
|
||||
priv->np_nifp,
|
||||
kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cdevsw netmap_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_name = "netmap",
|
||||
.d_open = netmap_open,
|
||||
.d_mmap_single = netmap_mmap_single,
|
||||
.d_ioctl = netmap_ioctl,
|
||||
.d_poll = netmap_poll,
|
||||
.d_kqfilter = netmap_kqfilter,
|
||||
.d_close = netmap_close,
|
||||
};
|
||||
/*--- end of kqueue support ----*/
|
||||
|
||||
/*
|
||||
* Kernel entry point.
|
||||
*
|
||||
* Initialize/finalize the module and return.
|
||||
*
|
||||
* Return 0 on success, errno on failure.
|
||||
*/
|
||||
static int
|
||||
netmap_loader(__unused struct module *module, int event, __unused void *arg)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
switch (event) {
|
||||
case MOD_LOAD:
|
||||
error = netmap_init();
|
||||
break;
|
||||
|
||||
case MOD_UNLOAD:
|
||||
netmap_fini();
|
||||
break;
|
||||
|
||||
default:
|
||||
error = EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
DEV_MODULE(netmap, netmap_loader, NULL);
|
818
netmap/sys/dev/netmap/netmap_generic.c
Normal file
818
netmap/sys/dev/netmap/netmap_generic.c
Normal file
@ -0,0 +1,818 @@
|
||||
/*
|
||||
* Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This module implements netmap support on top of standard,
|
||||
* unmodified device drivers.
|
||||
*
|
||||
* A NIOCREGIF request is handled here if the device does not
|
||||
* have native support. TX and RX rings are emulated as follows:
|
||||
*
|
||||
* NIOCREGIF
|
||||
* We preallocate a block of TX mbufs (roughly as many as
|
||||
* tx descriptors; the number is not critical) to speed up
|
||||
* operation during transmissions. The refcount on most of
|
||||
* these buffers is artificially bumped up so we can recycle
|
||||
* them more easily. Also, the destructor is intercepted
|
||||
* so we use it as an interrupt notification to wake up
|
||||
* processes blocked on a poll().
|
||||
*
|
||||
* For each receive ring we allocate one "struct mbq"
|
||||
* (an mbuf tailq plus a spinlock). We intercept packets
|
||||
* (through if_input)
|
||||
* on the receive path and put them in the mbq from which
|
||||
* netmap receive routines can grab them.
|
||||
*
|
||||
* TX:
|
||||
* in the generic_txsync() routine, netmap buffers are copied
|
||||
* (or linked, in a future) to the preallocated mbufs
|
||||
* and pushed to the transmit queue. Some of these mbufs
|
||||
* (those with NS_REPORT, or otherwise every half ring)
|
||||
* have the refcount=1, others have refcount=2.
|
||||
* When the destructor is invoked, we take that as
|
||||
* a notification that all mbufs up to that one in
|
||||
* the specific ring have been completed, and generate
|
||||
* the equivalent of a transmit interrupt.
|
||||
*
|
||||
* RX:
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
|
||||
#include <sys/cdefs.h> /* prerequisite */
|
||||
__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257666 2013-11-05 01:06:22Z luigi $");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/lock.h> /* PROT_EXEC */
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/socket.h> /* sockaddrs */
|
||||
#include <sys/selinfo.h>
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
|
||||
|
||||
// XXX temporary - D() defined here
|
||||
#include <net/netmap.h>
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
#include <dev/netmap/netmap_mem2.h>
|
||||
|
||||
#define rtnl_lock() D("rtnl_lock called");
|
||||
#define rtnl_unlock() D("rtnl_unlock called");
|
||||
#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
|
||||
#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
|
||||
#define smp_mb()
|
||||
|
||||
/*
|
||||
* mbuf wrappers
|
||||
*/
|
||||
|
||||
/*
|
||||
* we allocate an EXT_PACKET
|
||||
*/
|
||||
#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
|
||||
|
||||
/* mbuf destructor, also need to change the type to EXT_EXTREF,
|
||||
* add an M_NOFREE flag, and then clear the flag and
|
||||
* chain into uma_zfree(zone_pack, mf)
|
||||
* (or reinstall the buffer ?)
|
||||
*/
|
||||
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
|
||||
(m)->m_ext.ext_free = (void *)fn; \
|
||||
(m)->m_ext.ext_type = EXT_EXTREF; \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
|
||||
|
||||
|
||||
|
||||
#else /* linux */
|
||||
|
||||
#include "bsd_glue.h"
|
||||
|
||||
#include <linux/rtnetlink.h> /* rtnl_[un]lock() */
|
||||
#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
|
||||
#include <linux/hrtimer.h>
|
||||
|
||||
//#define REG_RESET
|
||||
|
||||
#endif /* linux */
|
||||
|
||||
|
||||
/* Common headers. */
|
||||
#include <net/netmap.h>
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
#include <dev/netmap/netmap_mem2.h>
|
||||
|
||||
|
||||
|
||||
/* ======================== usage stats =========================== */
|
||||
|
||||
#ifdef RATE_GENERIC
|
||||
#define IFRATE(x) x
|
||||
struct rate_stats {
|
||||
unsigned long txpkt;
|
||||
unsigned long txsync;
|
||||
unsigned long txirq;
|
||||
unsigned long rxpkt;
|
||||
unsigned long rxirq;
|
||||
unsigned long rxsync;
|
||||
};
|
||||
|
||||
struct rate_context {
|
||||
unsigned refcount;
|
||||
struct timer_list timer;
|
||||
struct rate_stats new;
|
||||
struct rate_stats old;
|
||||
};
|
||||
|
||||
#define RATE_PRINTK(_NAME_) \
|
||||
printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
|
||||
#define RATE_PERIOD 2
|
||||
static void rate_callback(unsigned long arg)
|
||||
{
|
||||
struct rate_context * ctx = (struct rate_context *)arg;
|
||||
struct rate_stats cur = ctx->new;
|
||||
int r;
|
||||
|
||||
RATE_PRINTK(txpkt);
|
||||
RATE_PRINTK(txsync);
|
||||
RATE_PRINTK(txirq);
|
||||
RATE_PRINTK(rxpkt);
|
||||
RATE_PRINTK(rxsync);
|
||||
RATE_PRINTK(rxirq);
|
||||
printk("\n");
|
||||
|
||||
ctx->old = cur;
|
||||
r = mod_timer(&ctx->timer, jiffies +
|
||||
msecs_to_jiffies(RATE_PERIOD * 1000));
|
||||
if (unlikely(r))
|
||||
D("[v1000] Error: mod_timer()");
|
||||
}
|
||||
|
||||
static struct rate_context rate_ctx;
|
||||
|
||||
void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
|
||||
{
|
||||
if (txp) rate_ctx.new.txpkt++;
|
||||
if (txs) rate_ctx.new.txsync++;
|
||||
if (txi) rate_ctx.new.txirq++;
|
||||
if (rxp) rate_ctx.new.rxpkt++;
|
||||
if (rxs) rate_ctx.new.rxsync++;
|
||||
if (rxi) rate_ctx.new.rxirq++;
|
||||
}
|
||||
|
||||
#else /* !RATE */
|
||||
#define IFRATE(x)
|
||||
#endif /* !RATE */
|
||||
|
||||
|
||||
/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
|
||||
#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */
|
||||
|
||||
/*
|
||||
* Wrapper used by the generic adapter layer to notify
|
||||
* the poller threads. Differently from netmap_rx_irq(), we check
|
||||
* only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
|
||||
*/
|
||||
static void
|
||||
netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
|
||||
{
|
||||
if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
|
||||
return;
|
||||
|
||||
netmap_common_irq(ifp, q, work_done);
|
||||
}
|
||||
|
||||
|
||||
/* Enable/disable netmap mode for a generic network interface. */
|
||||
static int
|
||||
generic_netmap_register(struct netmap_adapter *na, int enable)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
|
||||
struct mbuf *m;
|
||||
int error;
|
||||
int i, r;
|
||||
|
||||
if (!na)
|
||||
return EINVAL;
|
||||
|
||||
#ifdef REG_RESET
|
||||
error = ifp->netdev_ops->ndo_stop(ifp);
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
#endif /* REG_RESET */
|
||||
|
||||
if (enable) { /* Enable netmap mode. */
|
||||
/* Init the mitigation support on all the rx queues. */
|
||||
gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
|
||||
M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (!gna->mit) {
|
||||
D("mitigation allocation failed");
|
||||
error = ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
for (r=0; r<na->num_rx_rings; r++)
|
||||
netmap_mitigation_init(&gna->mit[r], r, na);
|
||||
|
||||
/* Initialize the rx queue, as generic_rx_handler() can
|
||||
* be called as soon as netmap_catch_rx() returns.
|
||||
*/
|
||||
for (r=0; r<na->num_rx_rings; r++) {
|
||||
mbq_safe_init(&na->rx_rings[r].rx_queue);
|
||||
}
|
||||
|
||||
/*
|
||||
* Preallocate packet buffers for the tx rings.
|
||||
*/
|
||||
for (r=0; r<na->num_tx_rings; r++)
|
||||
na->tx_rings[r].tx_pool = NULL;
|
||||
for (r=0; r<na->num_tx_rings; r++) {
|
||||
na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
|
||||
M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (!na->tx_rings[r].tx_pool) {
|
||||
D("tx_pool allocation failed");
|
||||
error = ENOMEM;
|
||||
goto free_tx_pools;
|
||||
}
|
||||
for (i=0; i<na->num_tx_desc; i++)
|
||||
na->tx_rings[r].tx_pool[i] = NULL;
|
||||
for (i=0; i<na->num_tx_desc; i++) {
|
||||
m = netmap_get_mbuf(GENERIC_BUF_SIZE);
|
||||
if (!m) {
|
||||
D("tx_pool[%d] allocation failed", i);
|
||||
error = ENOMEM;
|
||||
goto free_tx_pools;
|
||||
}
|
||||
na->tx_rings[r].tx_pool[i] = m;
|
||||
}
|
||||
}
|
||||
rtnl_lock();
|
||||
/* Prepare to intercept incoming traffic. */
|
||||
error = netmap_catch_rx(na, 1);
|
||||
if (error) {
|
||||
D("netdev_rx_handler_register() failed (%d)", error);
|
||||
goto register_handler;
|
||||
}
|
||||
ifp->if_capenable |= IFCAP_NETMAP;
|
||||
|
||||
/* Make netmap control the packet steering. */
|
||||
netmap_catch_tx(gna, 1);
|
||||
|
||||
rtnl_unlock();
|
||||
|
||||
#ifdef RATE_GENERIC
|
||||
if (rate_ctx.refcount == 0) {
|
||||
D("setup_timer()");
|
||||
memset(&rate_ctx, 0, sizeof(rate_ctx));
|
||||
setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
|
||||
if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
|
||||
D("Error: mod_timer()");
|
||||
}
|
||||
}
|
||||
rate_ctx.refcount++;
|
||||
#endif /* RATE */
|
||||
|
||||
} else if (na->tx_rings[0].tx_pool) {
|
||||
/* Disable netmap mode. We enter here only if the previous
|
||||
generic_netmap_register(na, 1) was successfull.
|
||||
If it was not, na->tx_rings[0].tx_pool was set to NULL by the
|
||||
error handling code below. */
|
||||
rtnl_lock();
|
||||
|
||||
ifp->if_capenable &= ~IFCAP_NETMAP;
|
||||
|
||||
/* Release packet steering control. */
|
||||
netmap_catch_tx(gna, 0);
|
||||
|
||||
/* Do not intercept packets on the rx path. */
|
||||
netmap_catch_rx(na, 0);
|
||||
|
||||
rtnl_unlock();
|
||||
|
||||
/* Free the mbufs going to the netmap rings */
|
||||
for (r=0; r<na->num_rx_rings; r++) {
|
||||
mbq_safe_purge(&na->rx_rings[r].rx_queue);
|
||||
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
|
||||
}
|
||||
|
||||
for (r=0; r<na->num_rx_rings; r++)
|
||||
netmap_mitigation_cleanup(&gna->mit[r]);
|
||||
free(gna->mit, M_DEVBUF);
|
||||
|
||||
for (r=0; r<na->num_tx_rings; r++) {
|
||||
for (i=0; i<na->num_tx_desc; i++) {
|
||||
m_freem(na->tx_rings[r].tx_pool[i]);
|
||||
}
|
||||
free(na->tx_rings[r].tx_pool, M_DEVBUF);
|
||||
}
|
||||
|
||||
#ifdef RATE_GENERIC
|
||||
if (--rate_ctx.refcount == 0) {
|
||||
D("del_timer()");
|
||||
del_timer(&rate_ctx.timer);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef REG_RESET
|
||||
error = ifp->netdev_ops->ndo_open(ifp);
|
||||
if (error) {
|
||||
goto free_tx_pools;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
register_handler:
|
||||
rtnl_unlock();
|
||||
free_tx_pools:
|
||||
for (r=0; r<na->num_tx_rings; r++) {
|
||||
if (na->tx_rings[r].tx_pool == NULL)
|
||||
continue;
|
||||
for (i=0; i<na->num_tx_desc; i++)
|
||||
if (na->tx_rings[r].tx_pool[i])
|
||||
m_freem(na->tx_rings[r].tx_pool[i]);
|
||||
free(na->tx_rings[r].tx_pool, M_DEVBUF);
|
||||
na->tx_rings[r].tx_pool = NULL;
|
||||
}
|
||||
for (r=0; r<na->num_rx_rings; r++) {
|
||||
netmap_mitigation_cleanup(&gna->mit[r]);
|
||||
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
|
||||
}
|
||||
free(gna->mit, M_DEVBUF);
|
||||
out:
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback invoked when the device driver frees an mbuf used
|
||||
* by netmap to transmit a packet. This usually happens when
|
||||
* the NIC notifies the driver that transmission is completed.
|
||||
*/
|
||||
static void
|
||||
generic_mbuf_destructor(struct mbuf *m)
|
||||
{
|
||||
if (netmap_verbose)
|
||||
D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
|
||||
netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
|
||||
#ifdef __FreeBSD__
|
||||
m->m_ext.ext_type = EXT_PACKET;
|
||||
m->m_ext.ext_free = NULL;
|
||||
if (*(m->m_ext.ref_cnt) == 0)
|
||||
*(m->m_ext.ref_cnt) = 1;
|
||||
uma_zfree(zone_pack, m);
|
||||
#endif /* __FreeBSD__ */
|
||||
IFRATE(rate_ctx.new.txirq++);
|
||||
}
|
||||
|
||||
/* Record completed transmissions and update hwtail.
|
||||
*
|
||||
* The oldest tx buffer not yet completed is at nr_hwtail + 1,
|
||||
* nr_hwcur is the first unsent buffer.
|
||||
*/
|
||||
static u_int
|
||||
generic_netmap_tx_clean(struct netmap_kring *kring)
|
||||
{
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int nm_i = nm_next(kring->nr_hwtail, lim);
|
||||
u_int hwcur = kring->nr_hwcur;
|
||||
u_int n = 0;
|
||||
struct mbuf **tx_pool = kring->tx_pool;
|
||||
|
||||
while (nm_i != hwcur) { /* buffers not completed */
|
||||
struct mbuf *m = tx_pool[nm_i];
|
||||
|
||||
if (unlikely(m == NULL)) {
|
||||
/* this is done, try to replenish the entry */
|
||||
tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
|
||||
if (unlikely(m == NULL)) {
|
||||
D("mbuf allocation failed, XXX error");
|
||||
// XXX how do we proceed ? break ?
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else if (GET_MBUF_REFCNT(m) != 1) {
|
||||
break; /* This mbuf is still busy: its refcnt is 2. */
|
||||
}
|
||||
n++;
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
}
|
||||
kring->nr_hwtail = nm_prev(nm_i, lim);
|
||||
ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* We have pending packets in the driver between nr_hwtail +1 and hwcur.
|
||||
* Compute a position in the middle, to be used to generate
|
||||
* a notification.
|
||||
*/
|
||||
static inline u_int
|
||||
generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
|
||||
{
|
||||
u_int n = kring->nkr_num_slots;
|
||||
u_int ntc = nm_next(kring->nr_hwtail, n-1);
|
||||
u_int e;
|
||||
|
||||
if (hwcur >= ntc) {
|
||||
e = (hwcur + ntc) / 2;
|
||||
} else { /* wrap around */
|
||||
e = (hwcur + n + ntc) / 2;
|
||||
if (e >= n) {
|
||||
e -= n;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(e >= n)) {
|
||||
D("This cannot happen");
|
||||
e = 0;
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have pending packets in the driver between nr_hwtail+1 and hwcur.
|
||||
* Schedule a notification approximately in the middle of the two.
|
||||
* There is a race but this is only called within txsync which does
|
||||
* a double check.
|
||||
*/
|
||||
static void
|
||||
generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
|
||||
{
|
||||
struct mbuf *m;
|
||||
u_int e;
|
||||
|
||||
if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
|
||||
return; /* all buffers are free */
|
||||
}
|
||||
e = generic_tx_event_middle(kring, hwcur);
|
||||
|
||||
m = kring->tx_pool[e];
|
||||
if (m == NULL) {
|
||||
/* This can happen if there is already an event on the netmap
|
||||
slot 'e': There is nothing to do. */
|
||||
return;
|
||||
}
|
||||
ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
|
||||
kring->tx_pool[e] = NULL;
|
||||
SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
|
||||
|
||||
// XXX wmb() ?
|
||||
/* Decrement the refcount an free it if we have the last one. */
|
||||
m_freem(m);
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* generic_netmap_txsync() transforms netmap buffers into mbufs
|
||||
* and passes them to the standard device driver
|
||||
* (ndo_start_xmit() or ifp->if_transmit() ).
|
||||
* On linux this is not done directly, but using dev_queue_xmit(),
|
||||
* since it implements the TX flow control (and takes some locks).
|
||||
*/
|
||||
static int
|
||||
generic_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_adapter *na = kring->na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */ // j
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = kring->rhead;
|
||||
u_int ring_nr = kring->ring_id;
|
||||
|
||||
IFRATE(rate_ctx.new.txsync++);
|
||||
|
||||
// TODO: handle the case of mbuf allocation failure
|
||||
|
||||
rmb();
|
||||
|
||||
/*
|
||||
* First part: process new packets to send.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) { /* we have new packets to send */
|
||||
while (nm_i != head) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
u_int len = slot->len;
|
||||
void *addr = NMB(slot);
|
||||
|
||||
/* device-specific */
|
||||
struct mbuf *m;
|
||||
int tx_ret;
|
||||
|
||||
NM_CHECK_ADDR_LEN(addr, len);
|
||||
|
||||
/* Tale a mbuf from the tx pool and copy in the user packet. */
|
||||
m = kring->tx_pool[nm_i];
|
||||
if (unlikely(!m)) {
|
||||
RD(5, "This should never happen");
|
||||
kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
|
||||
if (unlikely(m == NULL)) {
|
||||
D("mbuf allocation failed");
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* XXX we should ask notifications when NS_REPORT is set,
|
||||
* or roughly every half frame. We can optimize this
|
||||
* by lazily requesting notifications only when a
|
||||
* transmission fails. Probably the best way is to
|
||||
* break on failures and set notifications when
|
||||
* ring->cur == ring->tail || nm_i != cur
|
||||
*/
|
||||
tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
|
||||
if (unlikely(tx_ret)) {
|
||||
RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
|
||||
tx_ret, nm_i, head, kring->nr_hwtail);
|
||||
/*
|
||||
* No room for this mbuf in the device driver.
|
||||
* Request a notification FOR A PREVIOUS MBUF,
|
||||
* then call generic_netmap_tx_clean(kring) to do the
|
||||
* double check and see if we can free more buffers.
|
||||
* If there is space continue, else break;
|
||||
* NOTE: the double check is necessary if the problem
|
||||
* occurs in the txsync call after selrecord().
|
||||
* Also, we need some way to tell the caller that not
|
||||
* all buffers were queued onto the device (this was
|
||||
* not a problem with native netmap driver where space
|
||||
* is preallocated). The bridge has a similar problem
|
||||
* and we solve it there by dropping the excess packets.
|
||||
*/
|
||||
generic_set_tx_event(kring, nm_i);
|
||||
if (generic_netmap_tx_clean(kring)) { /* space now available */
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
IFRATE(rate_ctx.new.txpkt ++);
|
||||
}
|
||||
|
||||
/* Update hwcur to the next slot to transmit. */
|
||||
kring->nr_hwcur = nm_i; /* not head, we could break early */
|
||||
}
|
||||
|
||||
/*
|
||||
* Second, reclaim completed buffers
|
||||
*/
|
||||
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
|
||||
/* No more available slots? Set a notification event
|
||||
* on a netmap slot that will be cleaned in the future.
|
||||
* No doublecheck is performed, since txsync() will be
|
||||
* called twice by netmap_poll().
|
||||
*/
|
||||
generic_set_tx_event(kring, nm_i);
|
||||
}
|
||||
ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
|
||||
|
||||
generic_netmap_tx_clean(kring);
|
||||
|
||||
nm_txsync_finalize(kring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This handler is registered (through netmap_catch_rx())
|
||||
* within the attached network interface
|
||||
* in the RX subsystem, so that every mbuf passed up by
|
||||
* the driver can be stolen to the network stack.
|
||||
* Stolen packets are put in a queue where the
|
||||
* generic_netmap_rxsync() callback can extract them.
|
||||
*/
|
||||
void
|
||||
generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
|
||||
{
|
||||
struct netmap_adapter *na = NA(ifp);
|
||||
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
|
||||
u_int work_done;
|
||||
u_int rr = MBUF_RXQ(m); // receive ring number
|
||||
|
||||
if (rr >= na->num_rx_rings) {
|
||||
rr = rr % na->num_rx_rings; // XXX expensive...
|
||||
}
|
||||
|
||||
/* limit the size of the queue */
|
||||
if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
|
||||
m_freem(m);
|
||||
} else {
|
||||
mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
|
||||
}
|
||||
|
||||
if (netmap_generic_mit < 32768) {
|
||||
/* no rx mitigation, pass notification up */
|
||||
netmap_generic_irq(na->ifp, rr, &work_done);
|
||||
IFRATE(rate_ctx.new.rxirq++);
|
||||
} else {
|
||||
/* same as send combining, filter notification if there is a
|
||||
* pending timer, otherwise pass it up and start a timer.
|
||||
*/
|
||||
if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
|
||||
/* Record that there is some pending work. */
|
||||
gna->mit[rr].mit_pending = 1;
|
||||
} else {
|
||||
netmap_generic_irq(na->ifp, rr, &work_done);
|
||||
IFRATE(rate_ctx.new.rxirq++);
|
||||
netmap_mitigation_start(&gna->mit[rr]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* generic_netmap_rxsync() extracts mbufs from the queue filled by
|
||||
* generic_netmap_rx_handler() and puts their content in the netmap
|
||||
* receive ring.
|
||||
* Access must be protected because the rx handler is asynchronous,
|
||||
*/
|
||||
static int
|
||||
generic_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
{
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
u_int nm_i; /* index into the netmap ring */ //j,
|
||||
u_int n;
|
||||
u_int const lim = kring->nkr_num_slots - 1;
|
||||
u_int const head = nm_rxsync_prologue(kring);
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
if (head > lim)
|
||||
return netmap_ring_reinit(kring);
|
||||
|
||||
/*
|
||||
* First part: import newly received packets.
|
||||
*/
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
/* extract buffers from the rx queue, stop at most one
|
||||
* slot before nr_hwcur (stop_i)
|
||||
*/
|
||||
uint16_t slot_flags = kring->nkr_slot_flags;
|
||||
u_int stop_i = nm_prev(kring->nr_hwcur, lim);
|
||||
|
||||
nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
|
||||
for (n = 0; nm_i != stop_i; n++) {
|
||||
int len;
|
||||
void *addr = NMB(&ring->slot[nm_i]);
|
||||
struct mbuf *m;
|
||||
|
||||
/* we only check the address here on generic rx rings */
|
||||
if (addr == netmap_buffer_base) { /* Bad buffer */
|
||||
return netmap_ring_reinit(kring);
|
||||
}
|
||||
/*
|
||||
* Call the locked version of the function.
|
||||
* XXX Ideally we could grab a batch of mbufs at once
|
||||
* and save some locking overhead.
|
||||
*/
|
||||
m = mbq_safe_dequeue(&kring->rx_queue);
|
||||
if (!m) /* no more data */
|
||||
break;
|
||||
len = MBUF_LEN(m);
|
||||
m_copydata(m, 0, len, addr);
|
||||
ring->slot[nm_i].len = len;
|
||||
ring->slot[nm_i].flags = slot_flags;
|
||||
m_freem(m);
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
}
|
||||
if (n) {
|
||||
kring->nr_hwtail = nm_i;
|
||||
IFRATE(rate_ctx.new.rxpkt += n);
|
||||
}
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
}
|
||||
|
||||
// XXX should we invert the order ?
|
||||
/*
|
||||
* Second part: skip past packets that userspace has released.
|
||||
*/
|
||||
nm_i = kring->nr_hwcur;
|
||||
if (nm_i != head) {
|
||||
/* Userspace has released some packets. */
|
||||
for (n = 0; nm_i != head; n++) {
|
||||
struct netmap_slot *slot = &ring->slot[nm_i];
|
||||
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
nm_i = nm_next(nm_i, lim);
|
||||
}
|
||||
kring->nr_hwcur = head;
|
||||
}
|
||||
/* tell userspace that there might be new packets. */
|
||||
nm_rxsync_finalize(kring);
|
||||
IFRATE(rate_ctx.new.rxsync++);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
generic_netmap_dtor(struct netmap_adapter *na)
|
||||
{
|
||||
struct ifnet *ifp = na->ifp;
|
||||
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
|
||||
struct netmap_adapter *prev_na = gna->prev;
|
||||
|
||||
if (prev_na != NULL) {
|
||||
D("Released generic NA %p", gna);
|
||||
if_rele(na->ifp);
|
||||
netmap_adapter_put(prev_na);
|
||||
}
|
||||
if (ifp != NULL) {
|
||||
WNA(ifp) = prev_na;
|
||||
D("Restored native NA %p", prev_na);
|
||||
na->ifp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* generic_netmap_attach() makes it possible to use netmap on
|
||||
* a device without native netmap support.
|
||||
* This is less performant than native support but potentially
|
||||
* faster than raw sockets or similar schemes.
|
||||
*
|
||||
* In this "emulated" mode, netmap rings do not necessarily
|
||||
* have the same size as those in the NIC. We use a default
|
||||
* value and possibly override it if the OS has ways to fetch the
|
||||
* actual configuration.
|
||||
*/
|
||||
int
|
||||
generic_netmap_attach(struct ifnet *ifp)
|
||||
{
|
||||
struct netmap_adapter *na;
|
||||
struct netmap_generic_adapter *gna;
|
||||
int retval;
|
||||
u_int num_tx_desc, num_rx_desc;
|
||||
|
||||
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
|
||||
|
||||
generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
|
||||
ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
|
||||
if (num_tx_desc == 0 || num_rx_desc == 0) {
|
||||
D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (gna == NULL) {
|
||||
D("no memory on attach, give up");
|
||||
return ENOMEM;
|
||||
}
|
||||
na = (struct netmap_adapter *)gna;
|
||||
na->ifp = ifp;
|
||||
na->num_tx_desc = num_tx_desc;
|
||||
na->num_rx_desc = num_rx_desc;
|
||||
na->nm_register = &generic_netmap_register;
|
||||
na->nm_txsync = &generic_netmap_txsync;
|
||||
na->nm_rxsync = &generic_netmap_rxsync;
|
||||
na->nm_dtor = &generic_netmap_dtor;
|
||||
/* when using generic, IFCAP_NETMAP is set so we force
|
||||
* NAF_SKIP_INTR to use the regular interrupt handler
|
||||
*/
|
||||
na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
|
||||
|
||||
ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
|
||||
ifp->num_tx_queues, ifp->real_num_tx_queues,
|
||||
ifp->tx_queue_len);
|
||||
ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
|
||||
ifp->num_rx_queues, ifp->real_num_rx_queues);
|
||||
|
||||
generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
|
||||
|
||||
retval = netmap_attach_common(na);
|
||||
if (retval) {
|
||||
free(gna, M_DEVBUF);
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
1396
netmap/sys/dev/netmap/netmap_kern.h
Normal file
1396
netmap/sys/dev/netmap/netmap_kern.h
Normal file
File diff suppressed because it is too large
Load Diff
163
netmap/sys/dev/netmap/netmap_mbq.c
Normal file
163
netmap/sys/dev/netmap/netmap_mbq.c
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
|
||||
#ifdef linux
|
||||
#include "bsd_glue.h"
|
||||
#else /* __FreeBSD__ */
|
||||
#include <sys/param.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/mbuf.h>
|
||||
#endif /* __FreeBSD__ */
|
||||
|
||||
#include "netmap_mbq.h"
|
||||
|
||||
|
||||
static inline void __mbq_init(struct mbq *q)
|
||||
{
|
||||
q->head = q->tail = NULL;
|
||||
q->count = 0;
|
||||
}
|
||||
|
||||
|
||||
void mbq_safe_init(struct mbq *q)
|
||||
{
|
||||
mtx_init(&q->lock, "mbq", NULL, MTX_SPIN);
|
||||
__mbq_init(q);
|
||||
}
|
||||
|
||||
|
||||
void mbq_init(struct mbq *q)
|
||||
{
|
||||
__mbq_init(q);
|
||||
}
|
||||
|
||||
|
||||
static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
|
||||
{
|
||||
m->m_nextpkt = NULL;
|
||||
if (q->tail) {
|
||||
q->tail->m_nextpkt = m;
|
||||
q->tail = m;
|
||||
} else {
|
||||
q->head = q->tail = m;
|
||||
}
|
||||
q->count++;
|
||||
}
|
||||
|
||||
|
||||
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
|
||||
{
|
||||
mtx_lock(&q->lock);
|
||||
__mbq_enqueue(q, m);
|
||||
mtx_unlock(&q->lock);
|
||||
}
|
||||
|
||||
|
||||
void mbq_enqueue(struct mbq *q, struct mbuf *m)
|
||||
{
|
||||
__mbq_enqueue(q, m);
|
||||
}
|
||||
|
||||
|
||||
static inline struct mbuf *__mbq_dequeue(struct mbq *q)
|
||||
{
|
||||
struct mbuf *ret = NULL;
|
||||
|
||||
if (q->head) {
|
||||
ret = q->head;
|
||||
q->head = ret->m_nextpkt;
|
||||
if (q->head == NULL) {
|
||||
q->tail = NULL;
|
||||
}
|
||||
q->count--;
|
||||
ret->m_nextpkt = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
struct mbuf *mbq_safe_dequeue(struct mbq *q)
|
||||
{
|
||||
struct mbuf *ret;
|
||||
|
||||
mtx_lock(&q->lock);
|
||||
ret = __mbq_dequeue(q);
|
||||
mtx_unlock(&q->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
struct mbuf *mbq_dequeue(struct mbq *q)
|
||||
{
|
||||
return __mbq_dequeue(q);
|
||||
}
|
||||
|
||||
|
||||
/* XXX seems pointless to have a generic purge */
|
||||
static void __mbq_purge(struct mbq *q, int safe)
|
||||
{
|
||||
struct mbuf *m;
|
||||
|
||||
for (;;) {
|
||||
m = safe ? mbq_safe_dequeue(q) : mbq_dequeue(q);
|
||||
if (m) {
|
||||
m_freem(m);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void mbq_purge(struct mbq *q)
|
||||
{
|
||||
__mbq_purge(q, 0);
|
||||
}
|
||||
|
||||
|
||||
void mbq_safe_purge(struct mbq *q)
|
||||
{
|
||||
__mbq_purge(q, 1);
|
||||
}
|
||||
|
||||
|
||||
void mbq_safe_destroy(struct mbq *q)
|
||||
{
|
||||
mtx_destroy(&q->lock);
|
||||
}
|
||||
|
||||
|
||||
void mbq_destroy(struct mbq *q)
|
||||
{
|
||||
}
|
78
netmap/sys/dev/netmap/netmap_mbq.h
Normal file
78
netmap/sys/dev/netmap/netmap_mbq.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __NETMAP_MBQ_H__
|
||||
#define __NETMAP_MBQ_H__
|
||||
|
||||
/*
|
||||
* These function implement an mbuf tailq with an optional lock.
|
||||
* The base functions act ONLY ON THE QUEUE, whereas the "safe"
|
||||
* variants (mbq_safe_*) also handle the lock.
|
||||
*/
|
||||
|
||||
/* XXX probably rely on a previous definition of SPINLOCK_T */
|
||||
#ifdef linux
|
||||
#define SPINLOCK_T safe_spinlock_t
|
||||
#else
|
||||
#define SPINLOCK_T struct mtx
|
||||
#endif
|
||||
|
||||
/* A FIFO queue of mbufs with an optional lock. */
|
||||
struct mbq {
|
||||
struct mbuf *head;
|
||||
struct mbuf *tail;
|
||||
int count;
|
||||
SPINLOCK_T lock;
|
||||
};
|
||||
|
||||
/* XXX "destroy" does not match "init" as a name.
|
||||
* We should also clarify whether init can be used while
|
||||
* holding a lock, and whether mbq_safe_destroy() is a NOP.
|
||||
*/
|
||||
void mbq_init(struct mbq *q);
|
||||
void mbq_destroy(struct mbq *q);
|
||||
void mbq_enqueue(struct mbq *q, struct mbuf *m);
|
||||
struct mbuf *mbq_dequeue(struct mbq *q);
|
||||
void mbq_purge(struct mbq *q);
|
||||
|
||||
/* XXX missing mbq_lock() and mbq_unlock */
|
||||
|
||||
void mbq_safe_init(struct mbq *q);
|
||||
void mbq_safe_destroy(struct mbq *q);
|
||||
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);
|
||||
struct mbuf *mbq_safe_dequeue(struct mbq *q);
|
||||
void mbq_safe_purge(struct mbq *q);
|
||||
|
||||
static inline unsigned int mbq_len(struct mbq *q)
|
||||
{
|
||||
return q->count;
|
||||
}
|
||||
|
||||
#endif /* __NETMAP_MBQ_H_ */
|
1377
netmap/sys/dev/netmap/netmap_mem2.c
Normal file
1377
netmap/sys/dev/netmap/netmap_mem2.c
Normal file
File diff suppressed because it is too large
Load Diff
227
netmap/sys/dev/netmap/netmap_mem2.h
Normal file
227
netmap/sys/dev/netmap/netmap_mem2.h
Normal file
@ -0,0 +1,227 @@
|
||||
/*
|
||||
* Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/dev/netmap/netmap_mem2.c 234290 2012-04-14 16:44:18Z luigi $
|
||||
*
|
||||
* (New) memory allocator for netmap
|
||||
*/
|
||||
|
||||
/*
|
||||
* This allocator creates three memory pools:
|
||||
* nm_if_pool for the struct netmap_if
|
||||
* nm_ring_pool for the struct netmap_ring
|
||||
* nm_buf_pool for the packet buffers.
|
||||
*
|
||||
* that contain netmap objects. Each pool is made of a number of clusters,
|
||||
* multiple of a page size, each containing an integer number of objects.
|
||||
* The clusters are contiguous in user space but not in the kernel.
|
||||
* Only nm_buf_pool needs to be dma-able,
|
||||
* but for convenience use the same type of allocator for all.
|
||||
*
|
||||
* Once mapped, the three pools are exported to userspace
|
||||
* as a contiguous block, starting from nm_if_pool. Each
|
||||
* cluster (and pool) is an integral number of pages.
|
||||
* [ . . . ][ . . . . . .][ . . . . . . . . . .]
|
||||
* nm_if nm_ring nm_buf
|
||||
*
|
||||
* The userspace areas contain offsets of the objects in userspace.
|
||||
* When (at init time) we write these offsets, we find out the index
|
||||
* of the object, and from there locate the offset from the beginning
|
||||
* of the region.
|
||||
*
|
||||
* The invididual allocators manage a pool of memory for objects of
|
||||
* the same size.
|
||||
* The pool is split into smaller clusters, whose size is a
|
||||
* multiple of the page size. The cluster size is chosen
|
||||
* to minimize the waste for a given max cluster size
|
||||
* (we do it by brute force, as we have relatively few objects
|
||||
* per cluster).
|
||||
*
|
||||
* Objects are aligned to the cache line (64 bytes) rounding up object
|
||||
* sizes when needed. A bitmap contains the state of each object.
|
||||
* Allocation scans the bitmap; this is done only on attach, so we are not
|
||||
* too worried about performance
|
||||
*
|
||||
* For each allocator we can define (thorugh sysctl) the size and
|
||||
* number of each object. Memory is allocated at the first use of a
|
||||
* netmap file descriptor, and can be freed when all such descriptors
|
||||
* have been released (including unmapping the memory).
|
||||
* If memory is scarce, the system tries to get as much as possible
|
||||
* and the sysctl values reflect the actual allocation.
|
||||
* Together with desired values, the sysctl export also absolute
|
||||
* min and maximum values that cannot be overridden.
|
||||
*
|
||||
* struct netmap_if:
|
||||
* variable size, max 16 bytes per ring pair plus some fixed amount.
|
||||
* 1024 bytes should be large enough in practice.
|
||||
*
|
||||
* In the worst case we have one netmap_if per ring in the system.
|
||||
*
|
||||
* struct netmap_ring
|
||||
* variable size, 8 byte per slot plus some fixed amount.
|
||||
* Rings can be large (e.g. 4k slots, or >32Kbytes).
|
||||
* We default to 36 KB (9 pages), and a few hundred rings.
|
||||
*
|
||||
* struct netmap_buffer
|
||||
* The more the better, both because fast interfaces tend to have
|
||||
* many slots, and because we may want to use buffers to store
|
||||
* packets in userspace avoiding copies.
|
||||
* Must contain a full frame (eg 1518, or more for vlans, jumbo
|
||||
* frames etc.) plus be nicely aligned, plus some NICs restrict
|
||||
* the size to multiple of 1K or so. Default to 2K
|
||||
*/
|
||||
#ifndef _NET_NETMAP_MEM2_H_
|
||||
#define _NET_NETMAP_MEM2_H_
|
||||
|
||||
|
||||
#define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */
|
||||
|
||||
#define NETMAP_POOL_MAX_NAMSZ 32
|
||||
|
||||
|
||||
enum {
|
||||
NETMAP_IF_POOL = 0,
|
||||
NETMAP_RING_POOL,
|
||||
NETMAP_BUF_POOL,
|
||||
NETMAP_POOLS_NR
|
||||
};
|
||||
|
||||
|
||||
struct netmap_obj_params {
|
||||
u_int size;
|
||||
u_int num;
|
||||
};
|
||||
struct netmap_obj_pool {
|
||||
char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */
|
||||
|
||||
/* ---------------------------------------------------*/
|
||||
/* these are only meaningful if the pool is finalized */
|
||||
/* (see 'finalized' field in netmap_mem_d) */
|
||||
u_int objtotal; /* actual total number of objects. */
|
||||
u_int memtotal; /* actual total memory space */
|
||||
u_int numclusters; /* actual number of clusters */
|
||||
|
||||
u_int objfree; /* number of free objects. */
|
||||
|
||||
struct lut_entry *lut; /* virt,phys addresses, objtotal entries */
|
||||
uint32_t *bitmap; /* one bit per buffer, 1 means free */
|
||||
uint32_t bitmap_slots; /* number of uint32 entries in bitmap */
|
||||
/* ---------------------------------------------------*/
|
||||
|
||||
/* limits */
|
||||
u_int objminsize; /* minimum object size */
|
||||
u_int objmaxsize; /* maximum object size */
|
||||
u_int nummin; /* minimum number of objects */
|
||||
u_int nummax; /* maximum number of objects */
|
||||
|
||||
/* these are changed only by config */
|
||||
u_int _objtotal; /* total number of objects */
|
||||
u_int _objsize; /* object size */
|
||||
u_int _clustsize; /* cluster size */
|
||||
u_int _clustentries; /* objects per cluster */
|
||||
u_int _numclusters; /* number of clusters */
|
||||
|
||||
/* requested values */
|
||||
u_int r_objtotal;
|
||||
u_int r_objsize;
|
||||
};
|
||||
|
||||
#ifdef linux
|
||||
// XXX a mtx would suffice here 20130415 lr
|
||||
#define NMA_LOCK_T struct semaphore
|
||||
#else /* !linux */
|
||||
#define NMA_LOCK_T struct mtx
|
||||
#endif /* linux */
|
||||
|
||||
typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
|
||||
typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
|
||||
typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
|
||||
|
||||
typedef uint16_t nm_memid_t;
|
||||
|
||||
/* We implement two kinds of netmap_mem_d structures:
|
||||
*
|
||||
* - global: used by hardware NICS;
|
||||
*
|
||||
* - private: used by VALE ports.
|
||||
*
|
||||
* In both cases, the netmap_mem_d structure has the same lifetime as the
|
||||
* netmap_adapter of the corresponding NIC or port. It is the responsibility of
|
||||
* the client code to delete the private allocator when the associated
|
||||
* netmap_adapter is freed (this is implemented by the NAF_MEM_OWNER flag in
|
||||
* netmap.c). The 'refcount' field counts the number of active users of the
|
||||
* structure. The global allocator uses this information to prevent/allow
|
||||
* reconfiguration. The private allocators release all their memory when there
|
||||
* are no active users. By 'active user' we mean an existing netmap_priv
|
||||
* structure holding a reference to the allocator.
|
||||
*/
|
||||
struct netmap_mem_d {
|
||||
NMA_LOCK_T nm_mtx; /* protect the allocator */
|
||||
u_int nm_totalsize; /* shorthand */
|
||||
|
||||
u_int flags;
|
||||
#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
|
||||
#define NETMAP_MEM_PRIVATE 0x2 /* uses private address space */
|
||||
int lasterr; /* last error for curr config */
|
||||
int refcount; /* existing priv structures */
|
||||
/* the three allocators */
|
||||
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
|
||||
|
||||
netmap_mem_config_t config;
|
||||
netmap_mem_finalize_t finalize;
|
||||
netmap_mem_deref_t deref;
|
||||
|
||||
nm_memid_t nm_id; /* allocator identifier */
|
||||
|
||||
/* list of all existing allocators, sorted by nm_id */
|
||||
struct netmap_mem_d *prev, *next;
|
||||
};
|
||||
|
||||
extern struct netmap_mem_d nm_mem;
|
||||
|
||||
vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
|
||||
int netmap_mem_finalize(struct netmap_mem_d *);
|
||||
int netmap_mem_init(void);
|
||||
void netmap_mem_fini(void);
|
||||
struct netmap_if *
|
||||
netmap_mem_if_new(const char *, struct netmap_adapter *);
|
||||
void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
|
||||
int netmap_mem_rings_create(struct netmap_adapter *);
|
||||
void netmap_mem_rings_delete(struct netmap_adapter *);
|
||||
void netmap_mem_deref(struct netmap_mem_d *);
|
||||
int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
|
||||
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
|
||||
struct netmap_mem_d* netmap_mem_private_new(const char *name,
|
||||
u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
|
||||
int* error);
|
||||
void netmap_mem_private_delete(struct netmap_mem_d *);
|
||||
|
||||
#define NETMAP_BDG_BUF_SIZE(n) ((n)->pools[NETMAP_BUF_POOL]._objsize)
|
||||
|
||||
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
|
||||
|
||||
|
||||
#endif
|
401
netmap/sys/dev/netmap/netmap_offloadings.c
Normal file
401
netmap/sys/dev/netmap/netmap_offloadings.c
Normal file
@ -0,0 +1,401 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* $FreeBSD: head/sys/dev/netmap/netmap_offloadings.c 261909 2014-02-15 04:53:04Z luigi $ */
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
#include <sys/cdefs.h> /* prerequisite */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/param.h> /* defines used in kernel.h */
|
||||
#include <sys/kernel.h> /* types used in module initialization */
|
||||
#include <sys/sockio.h>
|
||||
#include <sys/socketvar.h> /* struct socket */
|
||||
#include <sys/socket.h> /* sockaddrs */
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <machine/bus.h> /* bus_dmamap_* */
|
||||
#include <sys/endian.h>
|
||||
|
||||
#elif defined(linux)
|
||||
|
||||
#include "bsd_glue.h"
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
#warning OSX support is only partial
|
||||
#include "osx_glue.h"
|
||||
|
||||
#else
|
||||
|
||||
#error Unsupported platform
|
||||
|
||||
#endif /* unsupported */
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
|
||||
|
||||
|
||||
/* This routine is called by bdg_mismatch_datapath() when it finishes
|
||||
* accumulating bytes for a segment, in order to fix some fields in the
|
||||
* segment headers (which still contain the same content as the header
|
||||
* of the original GSO packet). 'buf' points to the beginning (e.g.
|
||||
* the ethernet header) of the segment, and 'len' is its length.
|
||||
*/
|
||||
static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
|
||||
u_int segmented_bytes, u_int last_segment,
|
||||
u_int tcp, u_int iphlen)
|
||||
{
|
||||
struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
|
||||
struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
|
||||
uint16_t *check = NULL;
|
||||
uint8_t *check_data = NULL;
|
||||
|
||||
if (iphlen == 20) {
|
||||
/* Set the IPv4 "Total Length" field. */
|
||||
iph->tot_len = htobe16(len-14);
|
||||
ND("ip total length %u", be16toh(ip->tot_len));
|
||||
|
||||
/* Set the IPv4 "Identification" field. */
|
||||
iph->id = htobe16(be16toh(iph->id) + idx);
|
||||
ND("ip identification %u", be16toh(iph->id));
|
||||
|
||||
/* Compute and insert the IPv4 header checksum. */
|
||||
iph->check = 0;
|
||||
iph->check = nm_csum_ipv4(iph);
|
||||
ND("IP csum %x", be16toh(iph->check));
|
||||
} else {/* if (iphlen == 40) */
|
||||
/* Set the IPv6 "Payload Len" field. */
|
||||
ip6h->payload_len = htobe16(len-14-iphlen);
|
||||
}
|
||||
|
||||
if (tcp) {
|
||||
struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
|
||||
|
||||
/* Set the TCP sequence number. */
|
||||
tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
|
||||
ND("tcp seq %u", be32toh(tcph->seq));
|
||||
|
||||
/* Zero the PSH and FIN TCP flags if this is not the last
|
||||
segment. */
|
||||
if (!last_segment)
|
||||
tcph->flags &= ~(0x8 | 0x1);
|
||||
ND("last_segment %u", last_segment);
|
||||
|
||||
check = &tcph->check;
|
||||
check_data = (uint8_t *)tcph;
|
||||
} else { /* UDP */
|
||||
struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
|
||||
|
||||
/* Set the UDP 'Length' field. */
|
||||
udph->len = htobe16(len-14-iphlen);
|
||||
|
||||
check = &udph->check;
|
||||
check_data = (uint8_t *)udph;
|
||||
}
|
||||
|
||||
/* Compute and insert TCP/UDP checksum. */
|
||||
*check = 0;
|
||||
if (iphlen == 20)
|
||||
nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
|
||||
else
|
||||
nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
|
||||
|
||||
ND("TCP/UDP csum %x", be16toh(*check));
|
||||
}
|
||||
|
||||
|
||||
/* The VALE mismatch datapath implementation. */
|
||||
void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
|
||||
struct netmap_vp_adapter *dst_na,
|
||||
struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
|
||||
u_int *j, u_int lim, u_int *howmany)
|
||||
{
|
||||
struct netmap_slot *slot = NULL;
|
||||
struct nm_vnet_hdr *vh = NULL;
|
||||
/* Number of source slots to process. */
|
||||
u_int frags = ft_p->ft_frags;
|
||||
struct nm_bdg_fwd *ft_end = ft_p + frags;
|
||||
|
||||
/* Source and destination pointers. */
|
||||
uint8_t *dst, *src;
|
||||
size_t src_len, dst_len;
|
||||
|
||||
u_int j_start = *j;
|
||||
u_int dst_slots = 0;
|
||||
|
||||
/* If the source port uses the offloadings, while destination doesn't,
|
||||
* we grab the source virtio-net header and do the offloadings here.
|
||||
*/
|
||||
if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
|
||||
vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
|
||||
}
|
||||
|
||||
/* Init source and dest pointers. */
|
||||
src = ft_p->ft_buf;
|
||||
src_len = ft_p->ft_len;
|
||||
slot = &ring->slot[*j];
|
||||
dst = BDG_NMB(&dst_na->up, slot);
|
||||
dst_len = src_len;
|
||||
|
||||
/* We are processing the first input slot and there is a mismatch
|
||||
* between source and destination virt_hdr_len (SHL and DHL).
|
||||
* When the a client is using virtio-net headers, the header length
|
||||
* can be:
|
||||
* - 10: the header corresponds to the struct nm_vnet_hdr
|
||||
* - 12: the first 10 bytes correspond to the struct
|
||||
* virtio_net_hdr, and the last 2 bytes store the
|
||||
* "mergeable buffers" info, which is an optional
|
||||
* hint that can be zeroed for compability
|
||||
*
|
||||
* The destination header is therefore built according to the
|
||||
* following table:
|
||||
*
|
||||
* SHL | DHL | destination header
|
||||
* -----------------------------
|
||||
* 0 | 10 | zero
|
||||
* 0 | 12 | zero
|
||||
* 10 | 0 | doesn't exist
|
||||
* 10 | 12 | first 10 bytes are copied from source header, last 2 are zero
|
||||
* 12 | 0 | doesn't exist
|
||||
* 12 | 10 | copied from the first 10 bytes of source header
|
||||
*/
|
||||
bzero(dst, dst_na->virt_hdr_len);
|
||||
if (na->virt_hdr_len && dst_na->virt_hdr_len)
|
||||
memcpy(dst, src, sizeof(struct nm_vnet_hdr));
|
||||
/* Skip the virtio-net headers. */
|
||||
src += na->virt_hdr_len;
|
||||
src_len -= na->virt_hdr_len;
|
||||
dst += dst_na->virt_hdr_len;
|
||||
dst_len = dst_na->virt_hdr_len + src_len;
|
||||
|
||||
/* Here it could be dst_len == 0 (which implies src_len == 0),
|
||||
* so we avoid passing a zero length fragment.
|
||||
*/
|
||||
if (dst_len == 0) {
|
||||
ft_p++;
|
||||
src = ft_p->ft_buf;
|
||||
src_len = ft_p->ft_len;
|
||||
dst_len = src_len;
|
||||
}
|
||||
|
||||
if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
|
||||
u_int gso_bytes = 0;
|
||||
/* Length of the GSO packet header. */
|
||||
u_int gso_hdr_len = 0;
|
||||
/* Pointer to the GSO packet header. Assume it is in a single fragment. */
|
||||
uint8_t *gso_hdr = NULL;
|
||||
/* Index of the current segment. */
|
||||
u_int gso_idx = 0;
|
||||
/* Payload data bytes segmented so far (e.g. TCP data bytes). */
|
||||
u_int segmented_bytes = 0;
|
||||
/* Length of the IP header (20 if IPv4, 40 if IPv6). */
|
||||
u_int iphlen = 0;
|
||||
/* Is this a TCP or an UDP GSO packet? */
|
||||
u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
|
||||
== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
|
||||
|
||||
/* Segment the GSO packet contained into the input slots (frags). */
|
||||
while (ft_p != ft_end) {
|
||||
size_t copy;
|
||||
|
||||
/* Grab the GSO header if we don't have it. */
|
||||
if (!gso_hdr) {
|
||||
uint16_t ethertype;
|
||||
|
||||
gso_hdr = src;
|
||||
|
||||
/* Look at the 'Ethertype' field to see if this packet
|
||||
* is IPv4 or IPv6.
|
||||
*/
|
||||
ethertype = be16toh(*((uint16_t *)(gso_hdr + 12)));
|
||||
if (ethertype == 0x0800)
|
||||
iphlen = 20;
|
||||
else /* if (ethertype == 0x86DD) */
|
||||
iphlen = 40;
|
||||
ND(3, "type=%04x", ethertype);
|
||||
|
||||
/* Compute gso_hdr_len. For TCP we need to read the
|
||||
* content of the 'Data Offset' field.
|
||||
*/
|
||||
if (tcp) {
|
||||
struct nm_tcphdr *tcph =
|
||||
(struct nm_tcphdr *)&gso_hdr[14+iphlen];
|
||||
|
||||
gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
|
||||
} else
|
||||
gso_hdr_len = 14 + iphlen + 8; /* UDP */
|
||||
|
||||
ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
|
||||
dst_na->mfs);
|
||||
|
||||
/* Advance source pointers. */
|
||||
src += gso_hdr_len;
|
||||
src_len -= gso_hdr_len;
|
||||
if (src_len == 0) {
|
||||
ft_p++;
|
||||
if (ft_p == ft_end)
|
||||
break;
|
||||
src = ft_p->ft_buf;
|
||||
src_len = ft_p->ft_len;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill in the header of the current segment. */
|
||||
if (gso_bytes == 0) {
|
||||
memcpy(dst, gso_hdr, gso_hdr_len);
|
||||
gso_bytes = gso_hdr_len;
|
||||
}
|
||||
|
||||
/* Fill in data and update source and dest pointers. */
|
||||
copy = src_len;
|
||||
if (gso_bytes + copy > dst_na->mfs)
|
||||
copy = dst_na->mfs - gso_bytes;
|
||||
memcpy(dst + gso_bytes, src, copy);
|
||||
gso_bytes += copy;
|
||||
src += copy;
|
||||
src_len -= copy;
|
||||
|
||||
/* A segment is complete or we have processed all the
|
||||
the GSO payload bytes. */
|
||||
if (gso_bytes >= dst_na->mfs ||
|
||||
(src_len == 0 && ft_p + 1 == ft_end)) {
|
||||
/* After raw segmentation, we must fix some header
|
||||
* fields and compute checksums, in a protocol dependent
|
||||
* way. */
|
||||
gso_fix_segment(dst, gso_bytes, gso_idx,
|
||||
segmented_bytes,
|
||||
src_len == 0 && ft_p + 1 == ft_end,
|
||||
tcp, iphlen);
|
||||
|
||||
ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
|
||||
slot->len = gso_bytes;
|
||||
slot->flags = 0;
|
||||
segmented_bytes += gso_bytes - gso_hdr_len;
|
||||
|
||||
dst_slots++;
|
||||
|
||||
/* Next destination slot. */
|
||||
*j = nm_next(*j, lim);
|
||||
slot = &ring->slot[*j];
|
||||
dst = BDG_NMB(&dst_na->up, slot);
|
||||
|
||||
gso_bytes = 0;
|
||||
gso_idx++;
|
||||
}
|
||||
|
||||
/* Next input slot. */
|
||||
if (src_len == 0) {
|
||||
ft_p++;
|
||||
if (ft_p == ft_end)
|
||||
break;
|
||||
src = ft_p->ft_buf;
|
||||
src_len = ft_p->ft_len;
|
||||
}
|
||||
}
|
||||
ND(3, "%d bytes segmented", segmented_bytes);
|
||||
|
||||
} else {
|
||||
/* Address of a checksum field into a destination slot. */
|
||||
uint16_t *check = NULL;
|
||||
/* Accumulator for an unfolded checksum. */
|
||||
rawsum_t csum = 0;
|
||||
|
||||
/* Process a non-GSO packet. */
|
||||
|
||||
/* Init 'check' if necessary. */
|
||||
if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
|
||||
if (unlikely(vh->csum_offset + vh->csum_start > src_len))
|
||||
D("invalid checksum request");
|
||||
else
|
||||
check = (uint16_t *)(dst + vh->csum_start +
|
||||
vh->csum_offset);
|
||||
}
|
||||
|
||||
while (ft_p != ft_end) {
|
||||
/* Init/update the packet checksum if needed. */
|
||||
if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
|
||||
if (!dst_slots)
|
||||
csum = nm_csum_raw(src + vh->csum_start,
|
||||
src_len - vh->csum_start, 0);
|
||||
else
|
||||
csum = nm_csum_raw(src, src_len, csum);
|
||||
}
|
||||
|
||||
/* Round to a multiple of 64 */
|
||||
src_len = (src_len + 63) & ~63;
|
||||
|
||||
if (ft_p->ft_flags & NS_INDIRECT) {
|
||||
if (copyin(src, dst, src_len)) {
|
||||
/* Invalid user pointer, pretend len is 0. */
|
||||
dst_len = 0;
|
||||
}
|
||||
} else {
|
||||
memcpy(dst, src, (int)src_len);
|
||||
}
|
||||
slot->len = dst_len;
|
||||
|
||||
dst_slots++;
|
||||
|
||||
/* Next destination slot. */
|
||||
*j = nm_next(*j, lim);
|
||||
slot = &ring->slot[*j];
|
||||
dst = BDG_NMB(&dst_na->up, slot);
|
||||
|
||||
/* Next source slot. */
|
||||
ft_p++;
|
||||
src = ft_p->ft_buf;
|
||||
dst_len = src_len = ft_p->ft_len;
|
||||
|
||||
}
|
||||
|
||||
/* Finalize (fold) the checksum if needed. */
|
||||
if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
|
||||
*check = nm_csum_fold(csum);
|
||||
}
|
||||
ND(3, "using %u dst_slots", dst_slots);
|
||||
|
||||
/* A second pass on the desitations slots to set the slot flags,
|
||||
* using the right number of destination slots.
|
||||
*/
|
||||
while (j_start != *j) {
|
||||
slot = &ring->slot[j_start];
|
||||
slot->flags = (dst_slots << 8)| NS_MOREFRAG;
|
||||
j_start = nm_next(j_start, lim);
|
||||
}
|
||||
/* Clear NS_MOREFRAG flag on last entry. */
|
||||
slot->flags = (dst_slots << 8);
|
||||
}
|
||||
|
||||
/* Update howmany. */
|
||||
if (unlikely(dst_slots > *howmany)) {
|
||||
dst_slots = *howmany;
|
||||
D("Slot allocation error: Should never happen");
|
||||
}
|
||||
*howmany -= dst_slots;
|
||||
}
|
708
netmap/sys/dev/netmap/netmap_pipe.c
Normal file
708
netmap/sys/dev/netmap/netmap_pipe.c
Normal file
@ -0,0 +1,708 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
#include <sys/cdefs.h> /* prerequisite */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/param.h> /* defines used in kernel.h */
|
||||
#include <sys/kernel.h> /* types used in module initialization */
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/socket.h> /* sockaddrs */
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <machine/bus.h> /* bus_dmamap_* */
|
||||
#include <sys/refcount.h>
|
||||
|
||||
|
||||
#elif defined(linux)
|
||||
|
||||
#include "bsd_glue.h"
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
#warning OSX support is only partial
|
||||
#include "osx_glue.h"
|
||||
|
||||
#else
|
||||
|
||||
#error Unsupported platform
|
||||
|
||||
#endif /* unsupported */
|
||||
|
||||
/*
|
||||
* common headers
|
||||
*/
|
||||
|
||||
#include <net/netmap.h>
|
||||
#include <dev/netmap/netmap_kern.h>
|
||||
#include <dev/netmap/netmap_mem2.h>
|
||||
|
||||
#ifdef WITH_PIPES
|
||||
|
||||
#define NM_PIPE_MAXSLOTS 4096
|
||||
|
||||
int netmap_default_pipes = 0; /* default number of pipes for each nic */
|
||||
SYSCTL_DECL(_dev_netmap);
|
||||
SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
|
||||
|
||||
/* allocate the pipe array in the parent adapter */
|
||||
int
|
||||
netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr)
|
||||
{
|
||||
size_t len;
|
||||
int mode = nmr->nr_flags & NR_REG_MASK;
|
||||
u_int npipes;
|
||||
|
||||
if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) {
|
||||
/* this is for our parent, not for us */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* TODO: we can resize the array if the new
|
||||
* request can accomodate the already existing pipes
|
||||
*/
|
||||
if (na->na_pipes) {
|
||||
nmr->nr_arg1 = na->na_max_pipes;
|
||||
return 0;
|
||||
}
|
||||
|
||||
npipes = nmr->nr_arg1;
|
||||
if (npipes == 0)
|
||||
npipes = netmap_default_pipes;
|
||||
nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL);
|
||||
|
||||
if (npipes == 0) {
|
||||
/* really zero, nothing to alloc */
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = sizeof(struct netmap_pipe_adapter *) * npipes;
|
||||
na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (na->na_pipes == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
na->na_max_pipes = npipes;
|
||||
na->na_next_pipe = 0;
|
||||
|
||||
out:
|
||||
nmr->nr_arg1 = npipes;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* deallocate the parent array in the parent adapter */
|
||||
void
|
||||
netmap_pipe_dealloc(struct netmap_adapter *na)
|
||||
{
|
||||
if (na->na_pipes) {
|
||||
ND("freeing pipes for %s", NM_IFPNAME(na->ifp));
|
||||
free(na->na_pipes, M_DEVBUF);
|
||||
na->na_pipes = NULL;
|
||||
na->na_max_pipes = 0;
|
||||
na->na_next_pipe = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* find a pipe endpoint with the given id among the parent's pipes */
|
||||
static struct netmap_pipe_adapter *
|
||||
netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id)
|
||||
{
|
||||
int i;
|
||||
struct netmap_pipe_adapter *na;
|
||||
|
||||
for (i = 0; i < parent->na_next_pipe; i++) {
|
||||
na = parent->na_pipes[i];
|
||||
if (na->id == pipe_id) {
|
||||
return na;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* add a new pipe endpoint to the parent array */
|
||||
static int
|
||||
netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
|
||||
{
|
||||
if (parent->na_next_pipe >= parent->na_max_pipes) {
|
||||
D("%s: no space left for pipes", NM_IFPNAME(parent->ifp));
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
parent->na_pipes[parent->na_next_pipe] = na;
|
||||
na->parent_slot = parent->na_next_pipe;
|
||||
parent->na_next_pipe++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* remove the given pipe endpoint from the parent array */
|
||||
static void
|
||||
netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
|
||||
{
|
||||
u_int n;
|
||||
n = --parent->na_next_pipe;
|
||||
if (n != na->parent_slot) {
|
||||
parent->na_pipes[na->parent_slot] =
|
||||
parent->na_pipes[n];
|
||||
}
|
||||
parent->na_pipes[n] = NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
|
||||
{
|
||||
struct netmap_kring *rxkring = txkring->pipe;
|
||||
u_int limit; /* slots to transfer */
|
||||
u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
|
||||
lim_rx = rxkring->nkr_num_slots - 1;
|
||||
int m, busy;
|
||||
|
||||
ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
|
||||
ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
|
||||
txkring->rcur, txkring->rhead, txkring->rtail);
|
||||
|
||||
j = rxkring->nr_hwtail; /* RX */
|
||||
k = txkring->nr_hwcur; /* TX */
|
||||
m = txkring->rhead - txkring->nr_hwcur; /* new slots */
|
||||
if (m < 0)
|
||||
m += txkring->nkr_num_slots;
|
||||
limit = m;
|
||||
m = rxkring->nkr_num_slots - 1; /* max avail space on destination */
|
||||
busy = j - rxkring->nr_hwcur; /* busy slots */
|
||||
if (busy < 0)
|
||||
busy += txkring->nkr_num_slots;
|
||||
m -= busy; /* subtract busy slots */
|
||||
ND(2, "m %d limit %d", m, limit);
|
||||
if (m < limit)
|
||||
limit = m;
|
||||
|
||||
if (limit == 0) {
|
||||
/* either the rxring is full, or nothing to send */
|
||||
nm_txsync_finalize(txkring); /* actually useless */
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (limit-- > 0) {
|
||||
struct netmap_slot *rs = &rxkring->save_ring->slot[j];
|
||||
struct netmap_slot *ts = &txkring->ring->slot[k];
|
||||
struct netmap_slot tmp;
|
||||
|
||||
/* swap the slots */
|
||||
tmp = *rs;
|
||||
*rs = *ts;
|
||||
*ts = tmp;
|
||||
|
||||
/* no need to report the buffer change */
|
||||
|
||||
j = nm_next(j, lim_rx);
|
||||
k = nm_next(k, lim_tx);
|
||||
}
|
||||
|
||||
wmb(); /* make sure the slots are updated before publishing them */
|
||||
rxkring->nr_hwtail = j;
|
||||
txkring->nr_hwcur = k;
|
||||
txkring->nr_hwtail = nm_prev(k, lim_tx);
|
||||
|
||||
nm_txsync_finalize(txkring);
|
||||
ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
|
||||
txkring->rcur, txkring->rhead, txkring->rtail, j);
|
||||
|
||||
wmb(); /* make sure rxkring->nr_hwtail is updated before notifying */
|
||||
rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
|
||||
{
|
||||
struct netmap_kring *txkring = rxkring->pipe;
|
||||
uint32_t oldhwcur = rxkring->nr_hwcur;
|
||||
|
||||
ND("%s %x <- %s", rxkring->name, flags, txkring->name);
|
||||
rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */
|
||||
ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
|
||||
rxkring->rcur, rxkring->rhead, rxkring->rtail);
|
||||
rmb(); /* paired with the first wmb() in txsync */
|
||||
nm_rxsync_finalize(rxkring);
|
||||
|
||||
if (oldhwcur != rxkring->nr_hwcur) {
|
||||
/* we have released some slots, notify the other end */
|
||||
wmb(); /* make sure nr_hwcur is updated before notifying */
|
||||
txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Pipe endpoints are created and destroyed together, so that endopoints do not
|
||||
* have to check for the existence of their peer at each ?xsync.
|
||||
*
|
||||
* To play well with the existing netmap infrastructure (refcounts etc.), we
|
||||
* adopt the following strategy:
|
||||
*
|
||||
* 1) The first endpoint that is created also creates the other endpoint and
|
||||
* grabs a reference to it.
|
||||
*
|
||||
* state A) user1 --> endpoint1 --> endpoint2
|
||||
*
|
||||
* 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
|
||||
* its reference to the user:
|
||||
*
|
||||
* state B) user1 --> endpoint1 endpoint2 <--- user2
|
||||
*
|
||||
* 3) Assume that, starting from state B endpoint2 is closed. In the unregister
|
||||
* callback endpoint2 notes that endpoint1 is still active and adds a reference
|
||||
* from endpoint1 to itself. When user2 then releases her own reference,
|
||||
* endpoint2 is not destroyed and we are back to state A. A symmetrical state
|
||||
* would be reached if endpoint1 were released instead.
|
||||
*
|
||||
* 4) If, starting from state A, endpoint1 is closed, the destructor notes that
|
||||
* it owns a reference to endpoint2 and releases it.
|
||||
*
|
||||
* Something similar goes on for the creation and destruction of the krings.
|
||||
*/
|
||||
|
||||
|
||||
/* netmap_pipe_krings_delete.
|
||||
*
|
||||
* There are two cases:
|
||||
*
|
||||
* 1) state is
|
||||
*
|
||||
* usr1 --> e1 --> e2
|
||||
*
|
||||
* and we are e1. We have to create both sets
|
||||
* of krings.
|
||||
*
|
||||
* 2) state is
|
||||
*
|
||||
* usr1 --> e1 --> e2
|
||||
*
|
||||
* and we are e2. e1 is certainly registered and our
|
||||
* krings already exist, but they may be hidden.
|
||||
*/
|
||||
static int
|
||||
netmap_pipe_krings_create(struct netmap_adapter *na)
|
||||
{
|
||||
struct netmap_pipe_adapter *pna =
|
||||
(struct netmap_pipe_adapter *)na;
|
||||
struct netmap_adapter *ona = &pna->peer->up;
|
||||
int error = 0;
|
||||
if (pna->peer_ref) {
|
||||
int i;
|
||||
|
||||
/* case 1) above */
|
||||
D("%p: case 1, create everything", na);
|
||||
error = netmap_krings_create(na, 0);
|
||||
if (error)
|
||||
goto err;
|
||||
|
||||
/* we also create all the rings, since we need to
|
||||
* update the save_ring pointers.
|
||||
* netmap_mem_rings_create (called by our caller)
|
||||
* will not create the rings again
|
||||
*/
|
||||
|
||||
error = netmap_mem_rings_create(na);
|
||||
if (error)
|
||||
goto del_krings1;
|
||||
|
||||
/* update our hidden ring pointers */
|
||||
for (i = 0; i < na->num_tx_rings + 1; i++)
|
||||
na->tx_rings[i].save_ring = na->tx_rings[i].ring;
|
||||
for (i = 0; i < na->num_rx_rings + 1; i++)
|
||||
na->rx_rings[i].save_ring = na->rx_rings[i].ring;
|
||||
|
||||
/* now, create krings and rings of the other end */
|
||||
error = netmap_krings_create(ona, 0);
|
||||
if (error)
|
||||
goto del_rings1;
|
||||
|
||||
error = netmap_mem_rings_create(ona);
|
||||
if (error)
|
||||
goto del_krings2;
|
||||
|
||||
for (i = 0; i < ona->num_tx_rings + 1; i++)
|
||||
ona->tx_rings[i].save_ring = ona->tx_rings[i].ring;
|
||||
for (i = 0; i < ona->num_rx_rings + 1; i++)
|
||||
ona->rx_rings[i].save_ring = ona->rx_rings[i].ring;
|
||||
|
||||
/* cross link the krings */
|
||||
for (i = 0; i < na->num_tx_rings; i++) {
|
||||
na->tx_rings[i].pipe = pna->peer->up.rx_rings + i;
|
||||
na->rx_rings[i].pipe = pna->peer->up.tx_rings + i;
|
||||
pna->peer->up.tx_rings[i].pipe = na->rx_rings + i;
|
||||
pna->peer->up.rx_rings[i].pipe = na->tx_rings + i;
|
||||
}
|
||||
} else {
|
||||
int i;
|
||||
/* case 2) above */
|
||||
/* recover the hidden rings */
|
||||
ND("%p: case 2, hidden rings", na);
|
||||
for (i = 0; i < na->num_tx_rings + 1; i++)
|
||||
na->tx_rings[i].ring = na->tx_rings[i].save_ring;
|
||||
for (i = 0; i < na->num_rx_rings + 1; i++)
|
||||
na->rx_rings[i].ring = na->rx_rings[i].save_ring;
|
||||
}
|
||||
return 0;
|
||||
|
||||
del_krings2:
|
||||
netmap_krings_delete(ona);
|
||||
del_rings1:
|
||||
netmap_mem_rings_delete(na);
|
||||
del_krings1:
|
||||
netmap_krings_delete(na);
|
||||
err:
|
||||
return error;
|
||||
}
|
||||
|
||||
/* netmap_pipe_reg.
|
||||
*
|
||||
* There are two cases on registration (onoff==1)
|
||||
*
|
||||
* 1.a) state is
|
||||
*
|
||||
* usr1 --> e1 --> e2
|
||||
*
|
||||
* and we are e1. Nothing special to do.
|
||||
*
|
||||
* 1.b) state is
|
||||
*
|
||||
* usr1 --> e1 --> e2 <-- usr2
|
||||
*
|
||||
* and we are e2. Drop the ref e1 is holding.
|
||||
*
|
||||
* There are two additional cases on unregister (onoff==0)
|
||||
*
|
||||
* 2.a) state is
|
||||
*
|
||||
* usr1 --> e1 --> e2
|
||||
*
|
||||
* and we are e1. Nothing special to do, e2 will
|
||||
* be cleaned up by the destructor of e1.
|
||||
*
|
||||
* 2.b) state is
|
||||
*
|
||||
* usr1 --> e1 e2 <-- usr2
|
||||
*
|
||||
* and we are either e1 or e2. Add a ref from the
|
||||
* other end and hide our rings.
|
||||
*/
|
||||
static int
|
||||
netmap_pipe_reg(struct netmap_adapter *na, int onoff)
|
||||
{
|
||||
struct netmap_pipe_adapter *pna =
|
||||
(struct netmap_pipe_adapter *)na;
|
||||
struct ifnet *ifp = na->ifp;
|
||||
ND("%p: onoff %d", na, onoff);
|
||||
if (onoff) {
|
||||
ifp->if_capenable |= IFCAP_NETMAP;
|
||||
} else {
|
||||
ifp->if_capenable &= ~IFCAP_NETMAP;
|
||||
}
|
||||
if (pna->peer_ref) {
|
||||
ND("%p: case 1.a or 2.a, nothing to do", na);
|
||||
return 0;
|
||||
}
|
||||
if (onoff) {
|
||||
ND("%p: case 1.b, drop peer", na);
|
||||
pna->peer->peer_ref = 0;
|
||||
netmap_adapter_put(na);
|
||||
} else {
|
||||
int i;
|
||||
ND("%p: case 2.b, grab peer", na);
|
||||
netmap_adapter_get(na);
|
||||
pna->peer->peer_ref = 1;
|
||||
/* hide our rings from netmap_mem_rings_delete */
|
||||
for (i = 0; i < na->num_tx_rings + 1; i++) {
|
||||
na->tx_rings[i].ring = NULL;
|
||||
}
|
||||
for (i = 0; i < na->num_rx_rings + 1; i++) {
|
||||
na->rx_rings[i].ring = NULL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* netmap_pipe_krings_delete.
|
||||
*
|
||||
* There are two cases:
|
||||
*
|
||||
* 1) state is
|
||||
*
|
||||
* usr1 --> e1 --> e2
|
||||
*
|
||||
* and we are e1 (e2 is not registered, so krings_delete cannot be
|
||||
* called on it);
|
||||
*
|
||||
* 2) state is
|
||||
*
|
||||
* usr1 --> e1 e2 <-- usr2
|
||||
*
|
||||
* and we are either e1 or e2.
|
||||
*
|
||||
* In the former case we have to also delete the krings of e2;
|
||||
* in the latter case we do nothing (note that our krings
|
||||
* have already been hidden in the unregister callback).
|
||||
*/
|
||||
static void
|
||||
netmap_pipe_krings_delete(struct netmap_adapter *na)
|
||||
{
|
||||
struct netmap_pipe_adapter *pna =
|
||||
(struct netmap_pipe_adapter *)na;
|
||||
struct netmap_adapter *ona; /* na of the other end */
|
||||
int i;
|
||||
|
||||
if (!pna->peer_ref) {
|
||||
ND("%p: case 2, kept alive by peer", na);
|
||||
return;
|
||||
}
|
||||
/* case 1) above */
|
||||
ND("%p: case 1, deleting everyhing", na);
|
||||
netmap_krings_delete(na); /* also zeroes tx_rings etc. */
|
||||
/* restore the ring to be deleted on the peer */
|
||||
ona = &pna->peer->up;
|
||||
if (ona->tx_rings == NULL) {
|
||||
/* already deleted, we must be on an
|
||||
* cleanup-after-error path */
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < ona->num_tx_rings + 1; i++)
|
||||
ona->tx_rings[i].ring = ona->tx_rings[i].save_ring;
|
||||
for (i = 0; i < ona->num_rx_rings + 1; i++)
|
||||
ona->rx_rings[i].ring = ona->rx_rings[i].save_ring;
|
||||
netmap_mem_rings_delete(ona);
|
||||
netmap_krings_delete(ona);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
netmap_pipe_dtor(struct netmap_adapter *na)
|
||||
{
|
||||
struct netmap_pipe_adapter *pna =
|
||||
(struct netmap_pipe_adapter *)na;
|
||||
ND("%p", na);
|
||||
if (pna->peer_ref) {
|
||||
ND("%p: clean up peer", na);
|
||||
pna->peer_ref = 0;
|
||||
netmap_adapter_put(&pna->peer->up);
|
||||
}
|
||||
if (pna->role == NR_REG_PIPE_MASTER)
|
||||
netmap_pipe_remove(pna->parent, pna);
|
||||
netmap_adapter_put(pna->parent);
|
||||
free(na->ifp, M_DEVBUF);
|
||||
na->ifp = NULL;
|
||||
pna->parent = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
|
||||
{
|
||||
struct nmreq pnmr;
|
||||
struct netmap_adapter *pna; /* parent adapter */
|
||||
struct netmap_pipe_adapter *mna, *sna, *req;
|
||||
struct ifnet *ifp, *ifp2;
|
||||
u_int pipe_id;
|
||||
int role = nmr->nr_flags & NR_REG_MASK;
|
||||
int error;
|
||||
|
||||
ND("flags %x", nmr->nr_flags);
|
||||
|
||||
if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) {
|
||||
ND("not a pipe");
|
||||
return 0;
|
||||
}
|
||||
role = nmr->nr_flags & NR_REG_MASK;
|
||||
|
||||
/* first, try to find the parent adapter */
|
||||
bzero(&pnmr, sizeof(pnmr));
|
||||
memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
|
||||
/* pass to parent the requested number of pipes */
|
||||
pnmr.nr_arg1 = nmr->nr_arg1;
|
||||
error = netmap_get_na(&pnmr, &pna, create);
|
||||
if (error) {
|
||||
ND("parent lookup failed: %d", error);
|
||||
return error;
|
||||
}
|
||||
ND("found parent: %s", NM_IFPNAME(pna->ifp));
|
||||
|
||||
if (NETMAP_OWNED_BY_KERN(pna)) {
|
||||
ND("parent busy");
|
||||
error = EBUSY;
|
||||
goto put_out;
|
||||
}
|
||||
|
||||
/* next, lookup the pipe id in the parent list */
|
||||
req = NULL;
|
||||
pipe_id = nmr->nr_ringid & NETMAP_RING_MASK;
|
||||
mna = netmap_pipe_find(pna, pipe_id);
|
||||
if (mna) {
|
||||
if (mna->role == role) {
|
||||
ND("found %d directly at %d", pipe_id, mna->parent_slot);
|
||||
req = mna;
|
||||
} else {
|
||||
ND("found %d indirectly at %d", pipe_id, mna->parent_slot);
|
||||
req = mna->peer;
|
||||
}
|
||||
/* the pipe we have found already holds a ref to the parent,
|
||||
* so we need to drop the one we got from netmap_get_na()
|
||||
*/
|
||||
netmap_adapter_put(pna);
|
||||
goto found;
|
||||
}
|
||||
ND("pipe %d not found, create %d", pipe_id, create);
|
||||
if (!create) {
|
||||
error = ENODEV;
|
||||
goto put_out;
|
||||
}
|
||||
/* we create both master and slave.
|
||||
* The endpoint we were asked for holds a reference to
|
||||
* the other one.
|
||||
*/
|
||||
ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (!ifp) {
|
||||
error = ENOMEM;
|
||||
goto put_out;
|
||||
}
|
||||
strcpy(ifp->if_xname, NM_IFPNAME(pna->ifp));
|
||||
|
||||
mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (mna == NULL) {
|
||||
error = ENOMEM;
|
||||
goto free_ifp;
|
||||
}
|
||||
mna->up.ifp = ifp;
|
||||
|
||||
mna->id = pipe_id;
|
||||
mna->role = NR_REG_PIPE_MASTER;
|
||||
mna->parent = pna;
|
||||
|
||||
mna->up.nm_txsync = netmap_pipe_txsync;
|
||||
mna->up.nm_rxsync = netmap_pipe_rxsync;
|
||||
mna->up.nm_register = netmap_pipe_reg;
|
||||
mna->up.nm_dtor = netmap_pipe_dtor;
|
||||
mna->up.nm_krings_create = netmap_pipe_krings_create;
|
||||
mna->up.nm_krings_delete = netmap_pipe_krings_delete;
|
||||
mna->up.nm_mem = pna->nm_mem;
|
||||
mna->up.na_lut = pna->na_lut;
|
||||
mna->up.na_lut_objtotal = pna->na_lut_objtotal;
|
||||
|
||||
mna->up.num_tx_rings = 1;
|
||||
mna->up.num_rx_rings = 1;
|
||||
mna->up.num_tx_desc = nmr->nr_tx_slots;
|
||||
nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
|
||||
1, NM_PIPE_MAXSLOTS, NULL);
|
||||
mna->up.num_rx_desc = nmr->nr_rx_slots;
|
||||
nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
|
||||
1, NM_PIPE_MAXSLOTS, NULL);
|
||||
error = netmap_attach_common(&mna->up);
|
||||
if (error)
|
||||
goto free_mna;
|
||||
/* register the master with the parent */
|
||||
error = netmap_pipe_add(pna, mna);
|
||||
if (error)
|
||||
goto free_mna;
|
||||
|
||||
/* create the slave */
|
||||
ifp2 = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (!ifp) {
|
||||
error = ENOMEM;
|
||||
goto free_mna;
|
||||
}
|
||||
strcpy(ifp2->if_xname, NM_IFPNAME(pna->ifp));
|
||||
|
||||
sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
|
||||
if (sna == NULL) {
|
||||
error = ENOMEM;
|
||||
goto free_ifp2;
|
||||
}
|
||||
/* most fields are the same, copy from master and then fix */
|
||||
*sna = *mna;
|
||||
sna->up.ifp = ifp2;
|
||||
sna->role = NR_REG_PIPE_SLAVE;
|
||||
error = netmap_attach_common(&sna->up);
|
||||
if (error)
|
||||
goto free_sna;
|
||||
|
||||
/* join the two endpoints */
|
||||
mna->peer = sna;
|
||||
sna->peer = mna;
|
||||
|
||||
/* we already have a reference to the parent, but we
|
||||
* need another one for the other endpoint we created
|
||||
*/
|
||||
netmap_adapter_get(pna);
|
||||
|
||||
if (role == NR_REG_PIPE_MASTER) {
|
||||
req = mna;
|
||||
mna->peer_ref = 1;
|
||||
netmap_adapter_get(&sna->up);
|
||||
} else {
|
||||
req = sna;
|
||||
sna->peer_ref = 1;
|
||||
netmap_adapter_get(&mna->up);
|
||||
}
|
||||
ND("created master %p and slave %p", mna, sna);
|
||||
found:
|
||||
|
||||
ND("pipe %d %s at %p", pipe_id,
|
||||
(req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req);
|
||||
*na = &req->up;
|
||||
netmap_adapter_get(*na);
|
||||
|
||||
/* write the configuration back */
|
||||
nmr->nr_tx_rings = req->up.num_tx_rings;
|
||||
nmr->nr_rx_rings = req->up.num_rx_rings;
|
||||
nmr->nr_tx_slots = req->up.num_tx_desc;
|
||||
nmr->nr_rx_slots = req->up.num_rx_desc;
|
||||
|
||||
/* keep the reference to the parent.
|
||||
* It will be released by the req destructor
|
||||
*/
|
||||
|
||||
return 0;
|
||||
|
||||
free_sna:
|
||||
free(sna, M_DEVBUF);
|
||||
free_ifp2:
|
||||
free(ifp2, M_DEVBUF);
|
||||
free_mna:
|
||||
free(mna, M_DEVBUF);
|
||||
free_ifp:
|
||||
free(ifp, M_DEVBUF);
|
||||
put_out:
|
||||
netmap_adapter_put(pna);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
#endif /* WITH_PIPES */
|
2103
netmap/sys/dev/netmap/netmap_vale.c
Normal file
2103
netmap/sys/dev/netmap/netmap_vale.c
Normal file
File diff suppressed because it is too large
Load Diff
20
netmap/sys/modules/netmap/Makefile
Normal file
20
netmap/sys/modules/netmap/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# $FreeBSD$
|
||||
#
|
||||
# Compile netmap as a module, useful if you want a netmap bridge
|
||||
# or loadable drivers.
|
||||
|
||||
.PATH: ${.CURDIR}/../../dev/netmap
|
||||
.PATH.h: ${.CURDIR}/../../net
|
||||
CFLAGS += -I${.CURDIR}/../../
|
||||
KMOD = netmap
|
||||
SRCS = device_if.h bus_if.h opt_netmap.h
|
||||
SRCS += netmap.c netmap.h netmap_kern.h
|
||||
SRCS += netmap_mem2.c netmap_mem2.h
|
||||
SRCS += netmap_generic.c
|
||||
SRCS += netmap_mbq.c netmap_mbq.h
|
||||
SRCS += netmap_vale.c
|
||||
SRCS += netmap_freebsd.c
|
||||
SRCS += netmap_offloadings.c
|
||||
SRCS += netmap_pipe.c
|
||||
|
||||
.include <bsd.kmod.mk>
|
550
netmap/sys/net/netmap.h
Normal file
550
netmap/sys/net/netmap.h
Normal file
@ -0,0 +1,550 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $
|
||||
*
|
||||
* Definitions of constants and the structures used by the netmap
|
||||
* framework, for the part visible to both kernel and userspace.
|
||||
* Detailed info on netmap is available with "man netmap" or at
|
||||
*
|
||||
* http://info.iet.unipi.it/~luigi/netmap/
|
||||
*
|
||||
* This API is also used to communicate with the VALE software switch
|
||||
*/
|
||||
|
||||
#ifndef _NET_NETMAP_H_
|
||||
#define _NET_NETMAP_H_
|
||||
|
||||
#define NETMAP_API 11 /* current API version */
|
||||
|
||||
#define NETMAP_MIN_API 11 /* min and max versions accepted */
|
||||
#define NETMAP_MAX_API 15
|
||||
/*
|
||||
* Some fields should be cache-aligned to reduce contention.
|
||||
* The alignment is architecture and OS dependent, but rather than
|
||||
* digging into OS headers to find the exact value we use an estimate
|
||||
* that should cover most architectures.
|
||||
*/
|
||||
#define NM_CACHE_ALIGN 128
|
||||
|
||||
/*
|
||||
* --- Netmap data structures ---
|
||||
*
|
||||
* The userspace data structures used by netmap are shown below.
|
||||
* They are allocated by the kernel and mmap()ed by userspace threads.
|
||||
* Pointers are implemented as memory offsets or indexes,
|
||||
* so that they can be easily dereferenced in kernel and userspace.
|
||||
|
||||
KERNEL (opaque, obviously)
|
||||
|
||||
====================================================================
|
||||
|
|
||||
USERSPACE | struct netmap_ring
|
||||
+---->+---------------+
|
||||
/ | head,cur,tail |
|
||||
struct netmap_if (nifp, 1 per fd) / | buf_ofs |
|
||||
+---------------+ / | other fields |
|
||||
| ni_tx_rings | / +===============+
|
||||
| ni_rx_rings | / | buf_idx, len | slot[0]
|
||||
| | / | flags, ptr |
|
||||
| | / +---------------+
|
||||
+===============+ / | buf_idx, len | slot[1]
|
||||
| txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
|
||||
| txring_ofs[1] | +---------------+
|
||||
(tx+1 entries) (num_slots entries)
|
||||
| txring_ofs[t] | | buf_idx, len | slot[n-1]
|
||||
+---------------+ | flags, ptr |
|
||||
| rxring_ofs[0] | +---------------+
|
||||
| rxring_ofs[1] |
|
||||
(rx+1 entries)
|
||||
| rxring_ofs[r] |
|
||||
+---------------+
|
||||
|
||||
* For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
|
||||
* a file descriptor, the mmap()ed region contains a (logically readonly)
|
||||
* struct netmap_if pointing to struct netmap_ring's.
|
||||
*
|
||||
* There is one netmap_ring per physical NIC ring, plus one tx/rx ring
|
||||
* pair attached to the host stack (this pair is unused for non-NIC ports).
|
||||
*
|
||||
* All physical/host stack ports share the same memory region,
|
||||
* so that zero-copy can be implemented between them.
|
||||
* VALE switch ports instead have separate memory regions.
|
||||
*
|
||||
* The netmap_ring is the userspace-visible replica of the NIC ring.
|
||||
* Each slot has the index of a buffer (MTU-sized and residing in the
|
||||
* mmapped region), its length and some flags. An extra 64-bit pointer
|
||||
* is provided for user-supplied buffers in the tx path.
|
||||
*
|
||||
* In user space, the buffer address is computed as
|
||||
* (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
|
||||
*
|
||||
* Added in NETMAP_API 11:
|
||||
*
|
||||
* + NIOCREGIF can request the allocation of extra spare buffers from
|
||||
* the same memory pool. The desired number of buffers must be in
|
||||
* nr_arg3. The ioctl may return fewer buffers, depending on memory
|
||||
* availability. nr_arg3 will return the actual value, and, once
|
||||
* mapped, nifp->ni_bufs_head will be the index of the first buffer.
|
||||
*
|
||||
* The buffers are linked to each other using the first uint32_t
|
||||
* as the index. On close, ni_bufs_head must point to the list of
|
||||
* buffers to be released.
|
||||
*
|
||||
* + NIOCREGIF can request space for extra rings (and buffers)
|
||||
* allocated in the same memory space. The number of extra rings
|
||||
* is in nr_arg1, and is advisory. This is a no-op on NICs where
|
||||
* the size of the memory space is fixed.
|
||||
*
|
||||
* + NIOCREGIF can attach to PIPE rings sharing the same memory
|
||||
* space with a parent device. The ifname indicates the parent device,
|
||||
* which must already exist. Flags in nr_flags indicate if we want to
|
||||
* bind the master or slave side, the index (from nr_ringid)
|
||||
* is just a cookie and does need to be sequential.
|
||||
*
|
||||
* + NIOCREGIF can also attach to 'monitor' rings that replicate
|
||||
* the content of specific rings, also from the same memory space.
|
||||
*
|
||||
* Extra flags in nr_flags support the above functions.
|
||||
* Application libraries may use the following naming scheme:
|
||||
* netmap:foo all NIC ring pairs
|
||||
* netmap:foo^ only host ring pair
|
||||
* netmap:foo+ all NIC ring + host ring pairs
|
||||
* netmap:foo-k the k-th NIC ring pair
|
||||
* netmap:foo{k PIPE ring pair k, master side
|
||||
* netmap:foo}k PIPE ring pair k, slave side
|
||||
*/
|
||||
|
||||
/*
|
||||
* struct netmap_slot is a buffer descriptor
|
||||
*/
|
||||
struct netmap_slot {
|
||||
uint32_t buf_idx; /* buffer index */
|
||||
uint16_t len; /* length for this slot */
|
||||
uint16_t flags; /* buf changed, etc. */
|
||||
uint64_t ptr; /* pointer for indirect buffers */
|
||||
};
|
||||
|
||||
/*
|
||||
* The following flags control how the slot is used
|
||||
*/
|
||||
|
||||
#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */
|
||||
/*
|
||||
* must be set whenever buf_idx is changed (as it might be
|
||||
* necessary to recompute the physical address and mapping)
|
||||
*/
|
||||
|
||||
#define NS_REPORT 0x0002 /* ask the hardware to report results */
|
||||
/*
|
||||
* Request notification when slot is used by the hardware.
|
||||
* Normally transmit completions are handled lazily and
|
||||
* may be unreported. This flag lets us know when a slot
|
||||
* has been sent (e.g. to terminate the sender).
|
||||
*/
|
||||
|
||||
#define NS_FORWARD 0x0004 /* pass packet 'forward' */
|
||||
/*
|
||||
* (Only for physical ports, rx rings with NR_FORWARD set).
|
||||
* Slot released to the kernel (i.e. before ring->head) with
|
||||
* this flag set are passed to the peer ring (host/NIC),
|
||||
* thus restoring the host-NIC connection for these slots.
|
||||
* This supports efficient traffic monitoring or firewalling.
|
||||
*/
|
||||
|
||||
#define NS_NO_LEARN 0x0008 /* disable bridge learning */
|
||||
/*
|
||||
* On a VALE switch, do not 'learn' the source port for
|
||||
* this buffer.
|
||||
*/
|
||||
|
||||
#define NS_INDIRECT 0x0010 /* userspace buffer */
|
||||
/*
|
||||
* (VALE tx rings only) data is in a userspace buffer,
|
||||
* whose address is in the 'ptr' field in the slot.
|
||||
*/
|
||||
|
||||
#define NS_MOREFRAG 0x0020 /* packet has more fragments */
|
||||
/*
|
||||
* (VALE ports only)
|
||||
* Set on all but the last slot of a multi-segment packet.
|
||||
* The 'len' field refers to the individual fragment.
|
||||
*/
|
||||
|
||||
#define NS_PORT_SHIFT 8
|
||||
#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
|
||||
/*
|
||||
* The high 8 bits of the flag, if not zero, indicate the
|
||||
* destination port for the VALE switch, overriding
|
||||
* the lookup table.
|
||||
*/
|
||||
|
||||
#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
|
||||
/*
|
||||
* (VALE rx rings only) the high 8 bits
|
||||
* are the number of fragments.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* struct netmap_ring
|
||||
*
|
||||
* Netmap representation of a TX or RX ring (also known as "queue").
|
||||
* This is a queue implemented as a fixed-size circular array.
|
||||
* At the software level the important fields are: head, cur, tail.
|
||||
*
|
||||
* In TX rings:
|
||||
*
|
||||
* head first slot available for transmission.
|
||||
* cur wakeup point. select() and poll() will unblock
|
||||
* when 'tail' moves past 'cur'
|
||||
* tail (readonly) first slot reserved to the kernel
|
||||
*
|
||||
* [head .. tail-1] can be used for new packets to send;
|
||||
* 'head' and 'cur' must be incremented as slots are filled
|
||||
* with new packets to be sent;
|
||||
* 'cur' can be moved further ahead if we need more space
|
||||
* for new transmissions.
|
||||
*
|
||||
* In RX rings:
|
||||
*
|
||||
* head first valid received packet
|
||||
* cur wakeup point. select() and poll() will unblock
|
||||
* when 'tail' moves past 'cur'
|
||||
* tail (readonly) first slot reserved to the kernel
|
||||
*
|
||||
* [head .. tail-1] contain received packets;
|
||||
* 'head' and 'cur' must be incremented as slots are consumed
|
||||
* and can be returned to the kernel;
|
||||
* 'cur' can be moved further ahead if we want to wait for
|
||||
* new packets without returning the previous ones.
|
||||
*
|
||||
* DATA OWNERSHIP/LOCKING:
|
||||
* The netmap_ring, and all slots and buffers in the range
|
||||
* [head .. tail-1] are owned by the user program;
|
||||
* the kernel only accesses them during a netmap system call
|
||||
* and in the user thread context.
|
||||
*
|
||||
* Other slots and buffers are reserved for use by the kernel
|
||||
*/
|
||||
struct netmap_ring {
|
||||
/*
|
||||
* buf_ofs is meant to be used through macros.
|
||||
* It contains the offset of the buffer region from this
|
||||
* descriptor.
|
||||
*/
|
||||
const int64_t buf_ofs;
|
||||
const uint32_t num_slots; /* number of slots in the ring. */
|
||||
const uint32_t nr_buf_size;
|
||||
const uint16_t ringid;
|
||||
const uint16_t dir; /* 0: tx, 1: rx */
|
||||
|
||||
uint32_t head; /* (u) first user slot */
|
||||
uint32_t cur; /* (u) wakeup point */
|
||||
uint32_t tail; /* (k) first kernel slot */
|
||||
|
||||
uint32_t flags;
|
||||
|
||||
struct timeval ts; /* (k) time of last *sync() */
|
||||
|
||||
/* opaque room for a mutex or similar object */
|
||||
uint8_t sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN)));
|
||||
|
||||
/* the slots follow. This struct has variable size */
|
||||
struct netmap_slot slot[0]; /* array of slots. */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* RING FLAGS
|
||||
*/
|
||||
#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
|
||||
/*
|
||||
* updates the 'ts' field on each netmap syscall. This saves
|
||||
* saves a separate gettimeofday(), and is not much worse than
|
||||
* software timestamps generated in the interrupt handler.
|
||||
*/
|
||||
|
||||
#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
|
||||
/*
|
||||
* Enables the NS_FORWARD slot flag for the ring.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Netmap representation of an interface and its queue(s).
|
||||
* This is initialized by the kernel when binding a file
|
||||
* descriptor to a port, and should be considered as readonly
|
||||
* by user programs. The kernel never uses it.
|
||||
*
|
||||
* There is one netmap_if for each file descriptor on which we want
|
||||
* to select/poll.
|
||||
* select/poll operates on one or all pairs depending on the value of
|
||||
* nmr_queueid passed on the ioctl.
|
||||
*/
|
||||
struct netmap_if {
|
||||
char ni_name[IFNAMSIZ]; /* name of the interface. */
|
||||
const uint32_t ni_version; /* API version, currently unused */
|
||||
const uint32_t ni_flags; /* properties */
|
||||
#define NI_PRIV_MEM 0x1 /* private memory region */
|
||||
|
||||
/*
|
||||
* The number of packet rings available in netmap mode.
|
||||
* Physical NICs can have different numbers of tx and rx rings.
|
||||
* Physical NICs also have a 'host' ring pair.
|
||||
* Additionally, clients can request additional ring pairs to
|
||||
* be used for internal communication.
|
||||
*/
|
||||
const uint32_t ni_tx_rings; /* number of HW tx rings */
|
||||
const uint32_t ni_rx_rings; /* number of HW rx rings */
|
||||
|
||||
uint32_t ni_bufs_head; /* head index for extra bufs */
|
||||
uint32_t ni_spare1[5];
|
||||
/*
|
||||
* The following array contains the offset of each netmap ring
|
||||
* from this structure, in the following order:
|
||||
* NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings;
|
||||
* NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings.
|
||||
*
|
||||
* The area is filled up by the kernel on NIOCREGIF,
|
||||
* and then only read by userspace code.
|
||||
*/
|
||||
const ssize_t ring_ofs[0];
|
||||
};
|
||||
|
||||
|
||||
#ifndef NIOCREGIF
|
||||
/*
|
||||
* ioctl names and related fields
|
||||
*
|
||||
* NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
|
||||
* whose identity is set in NIOCREGIF through nr_ringid.
|
||||
* These are non blocking and take no argument.
|
||||
*
|
||||
* NIOCGINFO takes a struct ifreq, the interface name is the input,
|
||||
* the outputs are number of queues and number of descriptor
|
||||
* for each queue (useful to set number of threads etc.).
|
||||
* The info returned is only advisory and may change before
|
||||
* the interface is bound to a file descriptor.
|
||||
*
|
||||
* NIOCREGIF takes an interface name within a struct nmre,
|
||||
* and activates netmap mode on the interface (if possible).
|
||||
*
|
||||
* The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we
|
||||
* can pass it down to other NIC-related ioctls.
|
||||
*
|
||||
* The actual argument (struct nmreq) has a number of options to request
|
||||
* different functions.
|
||||
* The following are used in NIOCREGIF when nr_cmd == 0:
|
||||
*
|
||||
* nr_name (in)
|
||||
* The name of the port (em0, valeXXX:YYY, etc.)
|
||||
* limited to IFNAMSIZ for backward compatibility.
|
||||
*
|
||||
* nr_version (in/out)
|
||||
* Must match NETMAP_API as used in the kernel, error otherwise.
|
||||
* Always returns the desired value on output.
|
||||
*
|
||||
* nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out)
|
||||
* On input, non-zero values may be used to reconfigure the port
|
||||
* according to the requested values, but this is not guaranteed.
|
||||
* On output the actual values in use are reported.
|
||||
*
|
||||
* nr_ringid (in)
|
||||
* Indicates how rings should be bound to the file descriptors.
|
||||
* If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
|
||||
* are used to indicate the ring number, and nr_flags specifies
|
||||
* the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
|
||||
*
|
||||
* NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
|
||||
* If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
|
||||
* the binding as follows:
|
||||
* 0 (default) binds all physical rings
|
||||
* NETMAP_HW_RING | ring number binds a single ring pair
|
||||
* NETMAP_SW_RING binds only the host tx/rx rings
|
||||
*
|
||||
* NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push
|
||||
* packets on tx rings only if POLLOUT is set.
|
||||
* The default is to push any pending packet.
|
||||
*
|
||||
* NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
|
||||
* packets on rx rings also when POLLIN is NOT set.
|
||||
* The default is to touch the rx ring only with POLLIN.
|
||||
* Note that this is the opposite of TX because it
|
||||
* reflects the common usage.
|
||||
*
|
||||
* NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
|
||||
* NETMAP_PRIV_MEM is set on return for ports that do not use
|
||||
* the global memory allocator.
|
||||
* This information is not significant and applications
|
||||
* should look at the region id in nr_arg2
|
||||
*
|
||||
* nr_flags is the recommended mode to indicate which rings should
|
||||
* be bound to a file descriptor. Values are NR_REG_*
|
||||
*
|
||||
* nr_arg1 (in) The number of extra rings to be reserved.
|
||||
* Especially when allocating a VALE port the system only
|
||||
* allocates the amount of memory needed for the port.
|
||||
* If more shared memory rings are desired (e.g. for pipes),
|
||||
* the first invocation for the same basename/allocator
|
||||
* should specify a suitable number. Memory cannot be
|
||||
* extended after the first allocation without closing
|
||||
* all ports on the same region.
|
||||
*
|
||||
* nr_arg2 (in/out) The identity of the memory region used.
|
||||
* On input, 0 means the system decides autonomously,
|
||||
* other values may try to select a specific region.
|
||||
* On return the actual value is reported.
|
||||
* Region '1' is the global allocator, normally shared
|
||||
* by all interfaces. Other values are private regions.
|
||||
* If two ports the same region zero-copy is possible.
|
||||
*
|
||||
* nr_arg3 (in/out) number of extra buffers to be allocated.
|
||||
*
|
||||
*
|
||||
*
|
||||
* nr_cmd (in) if non-zero indicates a special command:
|
||||
* NETMAP_BDG_ATTACH and nr_name = vale*:ifname
|
||||
* attaches the NIC to the switch; nr_ringid specifies
|
||||
* which rings to use. Used by vale-ctl -a ...
|
||||
* nr_arg1 = NETMAP_BDG_HOST also attaches the host port
|
||||
* as in vale-ctl -h ...
|
||||
*
|
||||
* NETMAP_BDG_DETACH and nr_name = vale*:ifname
|
||||
* disconnects a previously attached NIC.
|
||||
* Used by vale-ctl -d ...
|
||||
*
|
||||
* NETMAP_BDG_LIST
|
||||
* list the configuration of VALE switches.
|
||||
*
|
||||
* NETMAP_BDG_VNET_HDR
|
||||
* Set the virtio-net header length used by the client
|
||||
* of a VALE switch port.
|
||||
*
|
||||
* nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* struct nmreq overlays a struct ifreq (just the name)
|
||||
*
|
||||
* On input, nr_ringid indicates which rings we are requesting,
|
||||
* with the low flags for the specific ring number.
|
||||
* selection FLAGS RING INDEX
|
||||
*
|
||||
* all the NIC rings 0x0000 -
|
||||
* only HOST ring 0x2000 -
|
||||
* single NIC ring 0x4000 ring index
|
||||
* all the NIC+HOST rings 0x6000 -
|
||||
* one pipe ring, master 0x8000 ring index
|
||||
* *** INVALID 0xA000
|
||||
* one pipe ring, slave 0xC000 ring index
|
||||
* *** INVALID 0xE000
|
||||
*
|
||||
*/
|
||||
struct nmreq {
|
||||
char nr_name[IFNAMSIZ];
|
||||
uint32_t nr_version; /* API version */
|
||||
uint32_t nr_offset; /* nifp offset in the shared region */
|
||||
uint32_t nr_memsize; /* size of the shared region */
|
||||
uint32_t nr_tx_slots; /* slots in tx rings */
|
||||
uint32_t nr_rx_slots; /* slots in rx rings */
|
||||
uint16_t nr_tx_rings; /* number of tx rings */
|
||||
uint16_t nr_rx_rings; /* number of rx rings */
|
||||
|
||||
uint16_t nr_ringid; /* ring(s) we care about */
|
||||
#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */
|
||||
#define NETMAP_SW_RING 0x2000 /* only host ring pair */
|
||||
|
||||
#define NETMAP_RING_MASK 0x0fff /* the ring number */
|
||||
|
||||
#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
|
||||
|
||||
#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */
|
||||
|
||||
uint16_t nr_cmd;
|
||||
#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
|
||||
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
|
||||
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
|
||||
#define NETMAP_BDG_LIST 4 /* get bridge's info */
|
||||
#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
|
||||
#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
|
||||
|
||||
uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
|
||||
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
|
||||
|
||||
uint16_t nr_arg2;
|
||||
uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */
|
||||
uint32_t nr_flags;
|
||||
/* various modes, extends nr_ringid */
|
||||
uint32_t spare2[1];
|
||||
};
|
||||
|
||||
#define NR_REG_MASK 0xf /* values for nr_flags */
|
||||
enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
|
||||
NR_REG_ALL_NIC = 1,
|
||||
NR_REG_SW = 2,
|
||||
NR_REG_NIC_SW = 3,
|
||||
NR_REG_ONE_NIC = 4,
|
||||
NR_REG_PIPE_MASTER = 5,
|
||||
NR_REG_PIPE_SLAVE = 6,
|
||||
};
|
||||
/* monitor uses the NR_REG to select the rings to monitor */
|
||||
#define NR_MONITOR_TX 0x100
|
||||
#define NR_MONITOR_RX 0x200
|
||||
|
||||
|
||||
/*
|
||||
* FreeBSD uses the size value embedded in the _IOWR to determine
|
||||
* how much to copy in/out. So we need it to match the actual
|
||||
* data structure we pass. We put some spares in the structure
|
||||
* to ease compatibility with other versions
|
||||
*/
|
||||
#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
|
||||
#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
|
||||
#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
|
||||
#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
|
||||
#endif /* !NIOCREGIF */
|
||||
|
||||
|
||||
/*
|
||||
* Helper functions for kernel and userspace
|
||||
*/
|
||||
|
||||
/*
|
||||
* check if space is available in the ring.
|
||||
*/
|
||||
static inline int
|
||||
nm_ring_empty(struct netmap_ring *ring)
|
||||
{
|
||||
return (ring->cur == ring->tail);
|
||||
}
|
||||
|
||||
#endif /* _NET_NETMAP_H_ */
|
677
netmap/sys/net/netmap_user.h
Normal file
677
netmap/sys/net/netmap_user.h
Normal file
@ -0,0 +1,677 @@
|
||||
/*
|
||||
* Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD$
|
||||
*
|
||||
* Functions and macros to manipulate netmap structures and packets
|
||||
* in userspace. See netmap(4) for more information.
|
||||
*
|
||||
* The address of the struct netmap_if, say nifp, is computed from the
|
||||
* value returned from ioctl(.., NIOCREG, ...) and the mmap region:
|
||||
* ioctl(fd, NIOCREG, &req);
|
||||
* mem = mmap(0, ... );
|
||||
* nifp = NETMAP_IF(mem, req.nr_nifp);
|
||||
* (so simple, we could just do it manually)
|
||||
*
|
||||
* From there:
|
||||
* struct netmap_ring *NETMAP_TXRING(nifp, index)
|
||||
* struct netmap_ring *NETMAP_RXRING(nifp, index)
|
||||
* we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
|
||||
*
|
||||
* ring->slot[i] gives us the i-th slot (we can access
|
||||
* directly len, flags, buf_idx)
|
||||
*
|
||||
* char *buf = NETMAP_BUF(ring, x) returns a pointer to
|
||||
* the buffer numbered x
|
||||
*
|
||||
* All ring indexes (head, cur, tail) should always move forward.
|
||||
* To compute the next index in a circular ring you can use
|
||||
* i = nm_ring_next(ring, i);
|
||||
*
|
||||
* To ease porting apps from pcap to netmap we supply a few fuctions
|
||||
* that can be called to open, close, read and write on netmap in a way
|
||||
* similar to libpcap. Note that the read/write function depend on
|
||||
* an ioctl()/select()/poll() being issued to refill rings or push
|
||||
* packets out.
|
||||
*
|
||||
* In order to use these, include #define NETMAP_WITH_LIBS
|
||||
* in the source file that invokes these functions.
|
||||
*/
|
||||
|
||||
#ifndef _NET_NETMAP_USER_H_
|
||||
#define _NET_NETMAP_USER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/socket.h> /* apple needs sockaddr */
|
||||
#include <net/if.h> /* IFNAMSIZ */
|
||||
|
||||
#ifndef likely
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif /* likely and unlikely */
|
||||
|
||||
#include <net/netmap.h>
|
||||
|
||||
/* helper macro */
|
||||
#define _NETMAP_OFFSET(type, ptr, offset) \
|
||||
((type)(void *)((char *)(ptr) + (offset)))
|
||||
|
||||
#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
|
||||
|
||||
#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
|
||||
nifp, (nifp)->ring_ofs[index] )
|
||||
|
||||
#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
|
||||
nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
|
||||
|
||||
#define NETMAP_BUF(ring, index) \
|
||||
((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
|
||||
|
||||
#define NETMAP_BUF_IDX(ring, buf) \
|
||||
( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
|
||||
(ring)->nr_buf_size )
|
||||
|
||||
|
||||
static inline uint32_t
|
||||
nm_ring_next(struct netmap_ring *r, uint32_t i)
|
||||
{
|
||||
return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Return 1 if we have pending transmissions in the tx ring.
|
||||
* When everything is complete ring->head = ring->tail + 1 (modulo ring size)
|
||||
*/
|
||||
static inline int
|
||||
nm_tx_pending(struct netmap_ring *r)
|
||||
{
|
||||
return nm_ring_next(r, r->tail) != r->head;
|
||||
}
|
||||
|
||||
|
||||
static inline uint32_t
|
||||
nm_ring_space(struct netmap_ring *ring)
|
||||
{
|
||||
int ret = ring->tail - ring->cur;
|
||||
if (ret < 0)
|
||||
ret += ring->num_slots;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#ifdef NETMAP_WITH_LIBS
|
||||
/*
|
||||
* Support for simple I/O libraries.
|
||||
* Include other system headers required for compiling this.
|
||||
*/
|
||||
|
||||
#ifndef HAVE_NETMAP_WITH_LIBS
|
||||
#define HAVE_NETMAP_WITH_LIBS
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/mman.h>
|
||||
#include <string.h> /* memset */
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/errno.h> /* EINVAL */
|
||||
#include <fcntl.h> /* O_RDWR */
|
||||
#include <unistd.h> /* close() */
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef ND /* debug macros */
|
||||
/* debug support */
|
||||
#define ND(_fmt, ...) do {} while(0)
|
||||
#define D(_fmt, ...) \
|
||||
do { \
|
||||
struct timeval t0; \
|
||||
gettimeofday(&t0, NULL); \
|
||||
fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \
|
||||
(int)(t0.tv_sec % 1000), (int)t0.tv_usec, \
|
||||
__FUNCTION__, __LINE__, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
/* Rate limited version of "D", lps indicates how many per second */
|
||||
#define RD(lps, format, ...) \
|
||||
do { \
|
||||
static int t0, __cnt; \
|
||||
struct timeval __xxts; \
|
||||
gettimeofday(&__xxts, NULL); \
|
||||
if (t0 != __xxts.tv_sec) { \
|
||||
t0 = __xxts.tv_sec; \
|
||||
__cnt = 0; \
|
||||
} \
|
||||
if (__cnt++ < lps) { \
|
||||
D(format, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
struct nm_pkthdr { /* same as pcap_pkthdr */
|
||||
struct timeval ts;
|
||||
uint32_t caplen;
|
||||
uint32_t len;
|
||||
};
|
||||
|
||||
struct nm_stat { /* same as pcap_stat */
|
||||
u_int ps_recv;
|
||||
u_int ps_drop;
|
||||
u_int ps_ifdrop;
|
||||
#ifdef WIN32
|
||||
u_int bs_capt;
|
||||
#endif /* WIN32 */
|
||||
};
|
||||
|
||||
#define NM_ERRBUF_SIZE 512
|
||||
|
||||
struct nm_desc {
|
||||
struct nm_desc *self; /* point to self if netmap. */
|
||||
int fd;
|
||||
void *mem;
|
||||
int memsize;
|
||||
int done_mmap; /* set if mem is the result of mmap */
|
||||
struct netmap_if * const nifp;
|
||||
uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
|
||||
uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
|
||||
struct nmreq req; /* also contains the nr_name = ifname */
|
||||
struct nm_pkthdr hdr;
|
||||
|
||||
/*
|
||||
* The memory contains netmap_if, rings and then buffers.
|
||||
* Given a pointer (e.g. to nm_inject) we can compare with
|
||||
* mem/buf_start/buf_end to tell if it is a buffer or
|
||||
* some other descriptor in our region.
|
||||
* We also store a pointer to some ring as it helps in the
|
||||
* translation from buffer indexes to addresses.
|
||||
*/
|
||||
struct netmap_ring * const some_ring;
|
||||
void * const buf_start;
|
||||
void * const buf_end;
|
||||
/* parameters from pcap_open_live */
|
||||
int snaplen;
|
||||
int promisc;
|
||||
int to_ms;
|
||||
char *errbuf;
|
||||
|
||||
/* save flags so we can restore them on close */
|
||||
uint32_t if_flags;
|
||||
uint32_t if_reqcap;
|
||||
uint32_t if_curcap;
|
||||
|
||||
struct nm_stat st;
|
||||
char msg[NM_ERRBUF_SIZE];
|
||||
};
|
||||
|
||||
/*
|
||||
* when the descriptor is open correctly, d->self == d
|
||||
* Eventually we should also use some magic number.
|
||||
*/
|
||||
#define P2NMD(p) ((struct nm_desc *)(p))
|
||||
#define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d))
|
||||
#define NETMAP_FD(d) (P2NMD(d)->fd)
|
||||
|
||||
|
||||
/*
|
||||
* this is a slightly optimized copy routine which rounds
|
||||
* to multiple of 64 bytes and is often faster than dealing
|
||||
* with other odd sizes. We assume there is enough room
|
||||
* in the source and destination buffers.
|
||||
*
|
||||
* XXX only for multiples of 64 bytes, non overlapped.
|
||||
*/
|
||||
static inline void
|
||||
nm_pkt_copy(const void *_src, void *_dst, int l)
|
||||
{
|
||||
const uint64_t *src = (const uint64_t *)_src;
|
||||
uint64_t *dst = (uint64_t *)_dst;
|
||||
|
||||
if (unlikely(l >= 1024)) {
|
||||
memcpy(dst, src, l);
|
||||
return;
|
||||
}
|
||||
for (; likely(l > 0); l-=64) {
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The callback, invoked on each received packet. Same as libpcap
|
||||
*/
|
||||
typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
|
||||
|
||||
/*
|
||||
*--- the pcap-like API ---
|
||||
*
|
||||
* nm_open() opens a file descriptor, binds to a port and maps memory.
|
||||
*
|
||||
* ifname (netmap:foo or vale:foo) is the port name
|
||||
* a suffix can indicate the follwing:
|
||||
* ^ bind the host (sw) ring pair
|
||||
* * bind host and NIC ring pairs (transparent)
|
||||
* -NN bind individual NIC ring pair
|
||||
* {NN bind master side of pipe NN
|
||||
* }NN bind slave side of pipe NN
|
||||
*
|
||||
* req provides the initial values of nmreq before parsing ifname.
|
||||
* Remember that the ifname parsing will override the ring
|
||||
* number in nm_ringid, and part of nm_flags;
|
||||
* flags special functions, normally 0
|
||||
* indicates which fields of *arg are significant
|
||||
* arg special functions, normally NULL
|
||||
* if passed a netmap_desc with mem != NULL,
|
||||
* use that memory instead of mmap.
|
||||
*/
|
||||
|
||||
static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req,
|
||||
uint64_t flags, const struct nm_desc *arg);
|
||||
|
||||
/*
|
||||
* nm_open can import some fields from the parent descriptor.
|
||||
* These flags control which ones.
|
||||
* Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL,
|
||||
* which set the initial value for these flags.
|
||||
* Note that the 16 low bits of the flags are reserved for data
|
||||
* that may go into the nmreq.
|
||||
*/
|
||||
enum {
|
||||
NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */
|
||||
NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */
|
||||
NM_OPEN_ARG1 = 0x100000,
|
||||
NM_OPEN_ARG2 = 0x200000,
|
||||
NM_OPEN_ARG3 = 0x400000,
|
||||
NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* nm_close() closes and restores the port to its previous state
|
||||
*/
|
||||
|
||||
static int nm_close(struct nm_desc *);
|
||||
|
||||
/*
|
||||
* nm_inject() is the same as pcap_inject()
|
||||
* nm_dispatch() is the same as pcap_dispatch()
|
||||
* nm_nextpkt() is the same as pcap_next()
|
||||
*/
|
||||
|
||||
static int nm_inject(struct nm_desc *, const void *, size_t);
|
||||
static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
|
||||
static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
|
||||
|
||||
|
||||
/*
|
||||
* Try to open, return descriptor if successful, NULL otherwise.
|
||||
* An invalid netmap name will return errno = 0;
|
||||
* You can pass a pointer to a pre-filled nm_desc to add special
|
||||
* parameters. Flags is used as follows
|
||||
* NM_OPEN_NO_MMAP use the memory from arg, only
|
||||
* if the nr_arg2 (memory block) matches.
|
||||
* NM_OPEN_ARG1 use req.nr_arg1 from arg
|
||||
* NM_OPEN_ARG2 use req.nr_arg2 from arg
|
||||
* NM_OPEN_RING_CFG user ring config from arg
|
||||
*/
|
||||
static struct nm_desc *
|
||||
nm_open(const char *ifname, const struct nmreq *req,
|
||||
uint64_t new_flags, const struct nm_desc *arg)
|
||||
{
|
||||
struct nm_desc *d = NULL;
|
||||
const struct nm_desc *parent = arg;
|
||||
u_int namelen;
|
||||
uint32_t nr_ringid = 0, nr_flags;
|
||||
const char *port = NULL;
|
||||
const char *errmsg = NULL;
|
||||
|
||||
if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
|
||||
errno = 0; /* name not recognised, not an error */
|
||||
return NULL;
|
||||
}
|
||||
if (ifname[0] == 'n')
|
||||
ifname += 7;
|
||||
/* scan for a separator */
|
||||
for (port = ifname; *port && !index("-*^{}", *port); port++)
|
||||
;
|
||||
namelen = port - ifname;
|
||||
if (namelen >= sizeof(d->req.nr_name)) {
|
||||
errmsg = "name too long";
|
||||
goto fail;
|
||||
}
|
||||
switch (*port) {
|
||||
default: /* '\0', no suffix */
|
||||
nr_flags = NR_REG_ALL_NIC;
|
||||
break;
|
||||
case '-': /* one NIC */
|
||||
nr_flags = NR_REG_ONE_NIC;
|
||||
nr_ringid = atoi(port + 1);
|
||||
break;
|
||||
case '*': /* NIC and SW, ignore port */
|
||||
nr_flags = NR_REG_NIC_SW;
|
||||
if (port[1]) {
|
||||
errmsg = "invalid port for nic+sw";
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
case '^': /* only sw ring */
|
||||
nr_flags = NR_REG_SW;
|
||||
if (port[1]) {
|
||||
errmsg = "invalid port for sw ring";
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
case '{':
|
||||
nr_flags = NR_REG_PIPE_MASTER;
|
||||
nr_ringid = atoi(port + 1);
|
||||
break;
|
||||
case '}':
|
||||
nr_flags = NR_REG_PIPE_SLAVE;
|
||||
nr_ringid = atoi(port + 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (nr_ringid >= NETMAP_RING_MASK) {
|
||||
errmsg = "invalid ringid";
|
||||
goto fail;
|
||||
}
|
||||
/* add the *XPOLL flags */
|
||||
nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
|
||||
|
||||
d = (struct nm_desc *)calloc(1, sizeof(*d));
|
||||
if (d == NULL) {
|
||||
errmsg = "nm_desc alloc failure";
|
||||
errno = ENOMEM;
|
||||
return NULL;
|
||||
}
|
||||
d->self = d; /* set this early so nm_close() works */
|
||||
d->fd = open("/dev/netmap", O_RDWR);
|
||||
if (d->fd < 0) {
|
||||
errmsg = "cannot open /dev/netmap";
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (req)
|
||||
d->req = *req;
|
||||
d->req.nr_version = NETMAP_API;
|
||||
d->req.nr_ringid &= ~NETMAP_RING_MASK;
|
||||
|
||||
/* these fields are overridden by ifname and flags processing */
|
||||
d->req.nr_ringid |= nr_ringid;
|
||||
d->req.nr_flags = nr_flags;
|
||||
memcpy(d->req.nr_name, ifname, namelen);
|
||||
d->req.nr_name[namelen] = '\0';
|
||||
/* optionally import info from parent */
|
||||
if (IS_NETMAP_DESC(parent) && new_flags) {
|
||||
if (new_flags & NM_OPEN_ARG1)
|
||||
D("overriding ARG1 %d", parent->req.nr_arg1);
|
||||
d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
|
||||
parent->req.nr_arg1 : 4;
|
||||
if (new_flags & NM_OPEN_ARG2)
|
||||
D("overriding ARG2 %d", parent->req.nr_arg2);
|
||||
d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
|
||||
parent->req.nr_arg2 : 0;
|
||||
if (new_flags & NM_OPEN_ARG3)
|
||||
D("overriding ARG3 %d", parent->req.nr_arg3);
|
||||
d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
|
||||
parent->req.nr_arg3 : 0;
|
||||
if (new_flags & NM_OPEN_RING_CFG) {
|
||||
D("overriding RING_CFG");
|
||||
d->req.nr_tx_slots = parent->req.nr_tx_slots;
|
||||
d->req.nr_rx_slots = parent->req.nr_rx_slots;
|
||||
d->req.nr_tx_rings = parent->req.nr_tx_rings;
|
||||
d->req.nr_rx_rings = parent->req.nr_rx_rings;
|
||||
}
|
||||
if (new_flags & NM_OPEN_IFNAME) {
|
||||
D("overriding ifname %s ringid 0x%x flags 0x%x",
|
||||
parent->req.nr_name, parent->req.nr_ringid,
|
||||
parent->req.nr_flags);
|
||||
memcpy(d->req.nr_name, parent->req.nr_name,
|
||||
sizeof(d->req.nr_name));
|
||||
d->req.nr_ringid = parent->req.nr_ringid;
|
||||
d->req.nr_flags = parent->req.nr_flags;
|
||||
}
|
||||
}
|
||||
if (ioctl(d->fd, NIOCREGIF, &d->req)) {
|
||||
errmsg = "NIOCREGIF failed";
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (IS_NETMAP_DESC(parent) && parent->mem &&
|
||||
parent->req.nr_arg2 == d->req.nr_arg2) {
|
||||
/* do not mmap, inherit from parent */
|
||||
d->memsize = parent->memsize;
|
||||
d->mem = parent->mem;
|
||||
} else {
|
||||
d->memsize = d->req.nr_memsize;
|
||||
d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
|
||||
d->fd, 0);
|
||||
if (d->mem == NULL) {
|
||||
errmsg = "mmap failed";
|
||||
goto fail;
|
||||
}
|
||||
d->done_mmap = 1;
|
||||
}
|
||||
{
|
||||
struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
|
||||
struct netmap_ring *r = NETMAP_RXRING(nifp, );
|
||||
|
||||
*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
|
||||
*(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
|
||||
*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
|
||||
*(void **)(uintptr_t)&d->buf_end =
|
||||
(char *)d->mem + d->memsize;
|
||||
}
|
||||
|
||||
if (nr_flags == NR_REG_SW) { /* host stack */
|
||||
d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
|
||||
d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
|
||||
} else if (nr_flags == NR_REG_ALL_NIC) { /* only nic */
|
||||
d->first_tx_ring = 0;
|
||||
d->first_rx_ring = 0;
|
||||
d->last_tx_ring = d->req.nr_tx_rings - 1;
|
||||
d->last_rx_ring = d->req.nr_rx_rings - 1;
|
||||
} else if (nr_flags == NR_REG_NIC_SW) {
|
||||
d->first_tx_ring = 0;
|
||||
d->first_rx_ring = 0;
|
||||
d->last_tx_ring = d->req.nr_tx_rings;
|
||||
d->last_rx_ring = d->req.nr_rx_rings;
|
||||
} else if (nr_flags == NR_REG_ONE_NIC) {
|
||||
/* XXX check validity */
|
||||
d->first_tx_ring = d->last_tx_ring =
|
||||
d->first_rx_ring = d->last_rx_ring = nr_ringid;
|
||||
} else { /* pipes */
|
||||
d->first_tx_ring = d->last_tx_ring = 0;
|
||||
d->first_rx_ring = d->last_rx_ring = 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_NETMAP_USER
|
||||
{ /* debugging code */
|
||||
int i;
|
||||
|
||||
D("%s tx %d .. %d %d rx %d .. %d %d", ifname,
|
||||
d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings,
|
||||
d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings);
|
||||
for (i = 0; i <= d->req.nr_tx_rings; i++) {
|
||||
struct netmap_ring *r = NETMAP_TXRING(d->nifp, i);
|
||||
D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
|
||||
}
|
||||
for (i = 0; i <= d->req.nr_rx_rings; i++) {
|
||||
struct netmap_ring *r = NETMAP_RXRING(d->nifp, i);
|
||||
D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail);
|
||||
}
|
||||
}
|
||||
#endif /* debugging */
|
||||
|
||||
d->cur_tx_ring = d->first_tx_ring;
|
||||
d->cur_rx_ring = d->first_rx_ring;
|
||||
return d;
|
||||
|
||||
fail:
|
||||
nm_close(d);
|
||||
if (errmsg)
|
||||
D("%s %s", errmsg, ifname);
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
nm_close(struct nm_desc *d)
|
||||
{
|
||||
/*
|
||||
* ugly trick to avoid unused warnings
|
||||
*/
|
||||
static void *__xxzt[] __attribute__ ((unused)) =
|
||||
{ (void *)nm_open, (void *)nm_inject,
|
||||
(void *)nm_dispatch, (void *)nm_nextpkt } ;
|
||||
|
||||
if (d == NULL || d->self != d)
|
||||
return EINVAL;
|
||||
if (d->done_mmap && d->mem)
|
||||
munmap(d->mem, d->memsize);
|
||||
if (d->fd != -1)
|
||||
close(d->fd);
|
||||
bzero(d, sizeof(*d));
|
||||
free(d);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Same prototype as pcap_inject(), only need to cast.
|
||||
*/
|
||||
static int
|
||||
nm_inject(struct nm_desc *d, const void *buf, size_t size)
|
||||
{
|
||||
u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
|
||||
|
||||
for (c = 0; c < n ; c++) {
|
||||
/* compute current ring to use */
|
||||
struct netmap_ring *ring;
|
||||
uint32_t i, idx;
|
||||
uint32_t ri = d->cur_tx_ring + c;
|
||||
|
||||
if (ri > d->last_tx_ring)
|
||||
ri = d->first_tx_ring;
|
||||
ring = NETMAP_TXRING(d->nifp, ri);
|
||||
if (nm_ring_empty(ring)) {
|
||||
continue;
|
||||
}
|
||||
i = ring->cur;
|
||||
idx = ring->slot[i].buf_idx;
|
||||
ring->slot[i].len = size;
|
||||
nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
|
||||
d->cur_tx_ring = ri;
|
||||
ring->head = ring->cur = nm_ring_next(ring, i);
|
||||
return size;
|
||||
}
|
||||
return 0; /* fail */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Same prototype as pcap_dispatch(), only need to cast.
|
||||
*/
|
||||
static int
|
||||
nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
|
||||
{
|
||||
int n = d->last_rx_ring - d->first_rx_ring + 1;
|
||||
int c, got = 0, ri = d->cur_rx_ring;
|
||||
|
||||
if (cnt == 0)
|
||||
cnt = -1;
|
||||
/* cnt == -1 means infinite, but rings have a finite amount
|
||||
* of buffers and the int is large enough that we never wrap,
|
||||
* so we can omit checking for -1
|
||||
*/
|
||||
for (c=0; c < n && cnt != got; c++) {
|
||||
/* compute current ring to use */
|
||||
struct netmap_ring *ring;
|
||||
|
||||
ri = d->cur_rx_ring + c;
|
||||
if (ri > d->last_rx_ring)
|
||||
ri = d->first_rx_ring;
|
||||
ring = NETMAP_RXRING(d->nifp, ri);
|
||||
for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
|
||||
u_int i = ring->cur;
|
||||
u_int idx = ring->slot[i].buf_idx;
|
||||
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
|
||||
|
||||
// __builtin_prefetch(buf);
|
||||
d->hdr.len = d->hdr.caplen = ring->slot[i].len;
|
||||
d->hdr.ts = ring->ts;
|
||||
cb(arg, &d->hdr, buf);
|
||||
ring->head = ring->cur = nm_ring_next(ring, i);
|
||||
}
|
||||
}
|
||||
d->cur_rx_ring = ri;
|
||||
return got;
|
||||
}
|
||||
|
||||
static u_char *
|
||||
nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr)
|
||||
{
|
||||
int ri = d->cur_rx_ring;
|
||||
|
||||
do {
|
||||
/* compute current ring to use */
|
||||
struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
|
||||
if (!nm_ring_empty(ring)) {
|
||||
u_int i = ring->cur;
|
||||
u_int idx = ring->slot[i].buf_idx;
|
||||
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
|
||||
|
||||
// __builtin_prefetch(buf);
|
||||
hdr->ts = ring->ts;
|
||||
hdr->len = hdr->caplen = ring->slot[i].len;
|
||||
ring->cur = nm_ring_next(ring, i);
|
||||
/* we could postpone advancing head if we want
|
||||
* to hold the buffer. This can be supported in
|
||||
* the future.
|
||||
*/
|
||||
ring->head = ring->cur;
|
||||
d->cur_rx_ring = ri;
|
||||
return buf;
|
||||
}
|
||||
ri++;
|
||||
if (ri > d->last_rx_ring)
|
||||
ri = d->first_rx_ring;
|
||||
} while (ri != d->cur_rx_ring);
|
||||
return NULL; /* nothing found */
|
||||
}
|
||||
|
||||
#endif /* !HAVE_NETMAP_WITH_LIBS */
|
||||
|
||||
#endif /* NETMAP_WITH_LIBS */
|
||||
|
||||
#endif /* _NET_NETMAP_USER_H_ */
|
Reference in New Issue
Block a user