1
0
mirror of git://projects.qi-hardware.com/openwrt-xburst.git synced 2024-10-01 11:04:10 +03:00

ar71xx: improve rx performance of the ethernet driver by using build_skb to deliver a cache-hot skb to the network stack

git-svn-id: svn://svn.openwrt.org/openwrt/trunk@31934 3c298f89-4303-0410-b956-a3cf2f4a3e73
This commit is contained in:
nbd 2012-05-28 02:55:59 +00:00
parent 29aa2abbf6
commit 0f299fab07
2 changed files with 49 additions and 41 deletions

View File

@ -53,6 +53,7 @@
#define AG71XX_TX_MTU_LEN 1540 #define AG71XX_TX_MTU_LEN 1540
#define AG71XX_RX_PKT_SIZE \ #define AG71XX_RX_PKT_SIZE \
(ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN) (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
#define AG71XX_RX_BUF_SIZE (AG71XX_RX_PKT_SIZE + NET_SKB_PAD + NET_IP_ALIGN)
#define AG71XX_TX_RING_SIZE_DEFAULT 64 #define AG71XX_TX_RING_SIZE_DEFAULT 64
#define AG71XX_RX_RING_SIZE_DEFAULT 128 #define AG71XX_RX_RING_SIZE_DEFAULT 128
@ -85,7 +86,10 @@ struct ag71xx_desc {
} __attribute__((aligned(4))); } __attribute__((aligned(4)));
struct ag71xx_buf { struct ag71xx_buf {
struct sk_buff *skb; union {
struct sk_buff *skb;
void *rx_buf;
};
struct ag71xx_desc *desc; struct ag71xx_desc *desc;
dma_addr_t dma_addr; dma_addr_t dma_addr;
unsigned long timestamp; unsigned long timestamp;

View File

@ -189,15 +189,17 @@ static void ag71xx_ring_rx_clean(struct ag71xx *ag)
return; return;
for (i = 0; i < ring->size; i++) for (i = 0; i < ring->size; i++)
if (ring->buf[i].skb) { if (ring->buf[i].rx_buf) {
dma_unmap_single(&ag->dev->dev, ring->buf[i].dma_addr, dma_unmap_single(&ag->dev->dev, ring->buf[i].dma_addr,
AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE); AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
kfree_skb(ring->buf[i].skb); kfree(ring->buf[i].rx_buf);
} }
} }
struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag) static int ag71xx_buffer_offset(struct ag71xx *ag)
{ {
int offset = NET_SKB_PAD;
/* /*
* On AR71xx/AR91xx packets must be 4-byte aligned. * On AR71xx/AR91xx packets must be 4-byte aligned.
* *
@ -205,17 +207,35 @@ struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag)
* so we don't need any extra alignment in that case. * so we don't need any extra alignment in that case.
*/ */
if (!ag71xx_get_pdata(ag)->is_ar724x || ag71xx_has_ar8216(ag)) if (!ag71xx_get_pdata(ag)->is_ar724x || ag71xx_has_ar8216(ag))
return netdev_alloc_skb(ag->dev, AG71XX_RX_PKT_SIZE); return offset;
return netdev_alloc_skb_ip_align(ag->dev, AG71XX_RX_PKT_SIZE); return offset + NET_IP_ALIGN;
} }
static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
int offset)
{
void *data;
data = kmalloc(AG71XX_RX_BUF_SIZE +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
GFP_ATOMIC);
if (!data)
return false;
buf->rx_buf = data;
buf->dma_addr = dma_map_single(&ag->dev->dev, data,
AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
buf->desc->data = (u32) buf->dma_addr + offset;
return true;
}
static int ag71xx_ring_rx_init(struct ag71xx *ag) static int ag71xx_ring_rx_init(struct ag71xx *ag)
{ {
struct ag71xx_ring *ring = &ag->rx_ring; struct ag71xx_ring *ring = &ag->rx_ring;
unsigned int i; unsigned int i;
int ret; int ret;
int offset = ag71xx_buffer_offset(ag);
ret = 0; ret = 0;
for (i = 0; i < ring->size; i++) { for (i = 0; i < ring->size; i++) {
@ -228,22 +248,11 @@ static int ag71xx_ring_rx_init(struct ag71xx *ag)
} }
for (i = 0; i < ring->size; i++) { for (i = 0; i < ring->size; i++) {
struct sk_buff *skb; if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], offset)) {
dma_addr_t dma_addr;
skb = ag71xx_rx_alloc(ag);
if (!skb) {
ret = -ENOMEM; ret = -ENOMEM;
break; break;
} }
skb->dev = ag->dev;
dma_addr = dma_map_single(&ag->dev->dev, skb->data,
AG71XX_RX_PKT_SIZE,
DMA_FROM_DEVICE);
ring->buf[i].skb = skb;
ring->buf[i].dma_addr = dma_addr;
ring->buf[i].desc->data = (u32) dma_addr;
ring->buf[i].desc->ctrl = DESC_EMPTY; ring->buf[i].desc->ctrl = DESC_EMPTY;
} }
@ -260,6 +269,7 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag)
{ {
struct ag71xx_ring *ring = &ag->rx_ring; struct ag71xx_ring *ring = &ag->rx_ring;
unsigned int count; unsigned int count;
int offset = ag71xx_buffer_offset(ag);
count = 0; count = 0;
for (; ring->curr - ring->dirty > 0; ring->dirty++) { for (; ring->curr - ring->dirty > 0; ring->dirty++) {
@ -267,24 +277,9 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag)
i = ring->dirty % ring->size; i = ring->dirty % ring->size;
if (ring->buf[i].skb == NULL) { if (!ring->buf[i].rx_buf &&
dma_addr_t dma_addr; !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset))
struct sk_buff *skb; break;
skb = ag71xx_rx_alloc(ag);
if (skb == NULL)
break;
skb->dev = ag->dev;
dma_addr = dma_map_single(&ag->dev->dev, skb->data,
AG71XX_RX_PKT_SIZE,
DMA_FROM_DEVICE);
ring->buf[i].skb = skb;
ring->buf[i].dma_addr = dma_addr;
ring->buf[i].desc->data = (u32) dma_addr;
}
ring->buf[i].desc->ctrl = DESC_EMPTY; ring->buf[i].desc->ctrl = DESC_EMPTY;
count++; count++;
@ -863,6 +858,7 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
{ {
struct net_device *dev = ag->dev; struct net_device *dev = ag->dev;
struct ag71xx_ring *ring = &ag->rx_ring; struct ag71xx_ring *ring = &ag->rx_ring;
int offset = ag71xx_buffer_offset(ag);
int done = 0; int done = 0;
DBG("%s: rx packets, limit=%d, curr=%u, dirty=%u\n", DBG("%s: rx packets, limit=%d, curr=%u, dirty=%u\n",
@ -885,18 +881,25 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR); ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
skb = ring->buf[i].skb;
pktlen = ag71xx_desc_pktlen(desc); pktlen = ag71xx_desc_pktlen(desc);
pktlen -= ETH_FCS_LEN; pktlen -= ETH_FCS_LEN;
dma_unmap_single(&dev->dev, ring->buf[i].dma_addr, dma_unmap_single(&dev->dev, ring->buf[i].dma_addr,
AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE); AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
dev->last_rx = jiffies; dev->last_rx = jiffies;
dev->stats.rx_packets++; dev->stats.rx_packets++;
dev->stats.rx_bytes += pktlen; dev->stats.rx_bytes += pktlen;
skb = build_skb(ring->buf[i].rx_buf);
if (!skb) {
kfree(ring->buf[i].rx_buf);
goto next;
}
skb_reserve(skb, offset);
skb_put(skb, pktlen); skb_put(skb, pktlen);
if (ag71xx_has_ar8216(ag)) if (ag71xx_has_ar8216(ag))
err = ag71xx_remove_ar8216_header(ag, skb, pktlen); err = ag71xx_remove_ar8216_header(ag, skb, pktlen);
@ -910,7 +913,8 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
netif_receive_skb(skb); netif_receive_skb(skb);
} }
ring->buf[i].skb = NULL; next:
ring->buf[i].rx_buf = NULL;
done++; done++;
ring->curr++; ring->curr++;
@ -944,7 +948,7 @@ static int ag71xx_poll(struct napi_struct *napi, int limit)
ag71xx_debugfs_update_napi_stats(ag, rx_done, tx_done); ag71xx_debugfs_update_napi_stats(ag, rx_done, tx_done);
rx_ring = &ag->rx_ring; rx_ring = &ag->rx_ring;
if (rx_ring->buf[rx_ring->dirty % rx_ring->size].skb == NULL) if (rx_ring->buf[rx_ring->dirty % rx_ring->size].rx_buf == NULL)
goto oom; goto oom;
status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS); status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);