--- a/drivers/net/ethernet/cavium/cns3xxx_eth.c
+++ b/drivers/net/ethernet/cavium/cns3xxx_eth.c
@@ -26,15 +26,21 @@
 
 #define DRV_NAME "cns3xxx_eth"
 
-#define RX_DESCS 512
-#define TX_DESCS 512
-#define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
+#define RX_DESCS 128
+#define TX_DESCS 128
 
 #define RX_POOL_ALLOC_SIZE (sizeof(struct rx_desc) * RX_DESCS)
 #define TX_POOL_ALLOC_SIZE (sizeof(struct tx_desc) * TX_DESCS)
 #define REGS_SIZE 336
-#define MAX_MRU (1536 + SKB_DMA_REALIGN)
-#define CNS3XXX_MAX_MTU (1536)
+
+#define RX_BUFFER_ALIGN 64
+#define RX_BUFFER_ALIGN_MASK (~(RX_BUFFER_ALIGN - 1))
+
+#define SKB_HEAD_ALIGN (((PAGE_SIZE - NET_SKB_PAD) % RX_BUFFER_ALIGN) + NET_SKB_PAD + NET_IP_ALIGN)
+#define RX_SEGMENT_ALLOC_SIZE 4096
+#define RX_SEGMENT_BUFSIZE (SKB_WITH_OVERHEAD(RX_SEGMENT_ALLOC_SIZE))
+#define RX_SEGMENT_MRU (((RX_SEGMENT_BUFSIZE - SKB_HEAD_ALIGN) & RX_BUFFER_ALIGN_MASK) - NET_IP_ALIGN)
+#define MAX_MTU	9500
 
 #define NAPI_WEIGHT 64
 
@@ -266,7 +272,7 @@ struct _rx_ring {
 	struct rx_desc *desc;
 	dma_addr_t phys_addr;
 	struct rx_desc *cur_addr;
-	struct sk_buff *buff_tab[RX_DESCS];
+	void *buff_tab[RX_DESCS];
 	unsigned int phys_tab[RX_DESCS];
 	u32 cur_index;
 	u32 alloc_index;
@@ -280,6 +286,8 @@ struct sw {
 	struct cns3xxx_plat_info *plat;
 	struct _tx_ring *tx_ring;
 	struct _rx_ring *rx_ring;
+	struct sk_buff *frag_first;
+	struct sk_buff *frag_last;
 };
 
 struct port {
@@ -500,37 +508,35 @@ static void cns3xxx_alloc_rx_buf(struct 
 	struct _rx_ring *rx_ring = sw->rx_ring;
 	unsigned int i = rx_ring->alloc_index;
 	struct rx_desc *desc = &(rx_ring)->desc[i];
-	struct sk_buff *skb;
+	void *buf;
 	unsigned int phys;
 
 	for (received += rx_ring->alloc_count; received > 0; received--) {
-		if ((skb = dev_alloc_skb(MAX_MRU))) {
-			if (SKB_DMA_REALIGN)
-				skb_reserve(skb, SKB_DMA_REALIGN);
-			skb_reserve(skb, NET_IP_ALIGN);
-			phys = dma_map_single(NULL, skb->data,
-				    CNS3XXX_MAX_MTU, DMA_FROM_DEVICE);
-			if (dma_mapping_error(NULL, phys)) {
-				dev_kfree_skb(skb);
-				/* Failed to map, better luck next time */
-				goto out;;
-			}
-			desc->sdp = phys;
-		} else {
-			/* Failed to allocate skb, try again next time */
+		buf = kzalloc(RX_SEGMENT_ALLOC_SIZE, GFP_ATOMIC);
+		if (!buf)
+			goto out;
+
+		phys = dma_map_single(NULL, buf + SKB_HEAD_ALIGN,
+				      RX_SEGMENT_MRU, DMA_FROM_DEVICE);
+		if (dma_mapping_error(NULL, phys)) {
+			kfree(buf);
 			goto out;
 		}
 
+		desc->sdl = RX_SEGMENT_MRU;
+		desc->sdp = phys;
+
 		/* put the new buffer on RX-free queue */
-		rx_ring->buff_tab[i] = skb;
+		rx_ring->buff_tab[i] = buf;
 		rx_ring->phys_tab[i] = phys;
 		if (i == RX_DESCS - 1) {
 			i = 0;
 			desc->config0 = END_OF_RING | FIRST_SEGMENT |
-					LAST_SEGMENT | CNS3XXX_MAX_MTU;
+					LAST_SEGMENT | RX_SEGMENT_MRU;
 			desc = &(rx_ring)->desc[i];
 		} else {
-			desc->config0 = FIRST_SEGMENT | LAST_SEGMENT | CNS3XXX_MAX_MTU;
+			desc->config0 = FIRST_SEGMENT | LAST_SEGMENT |
+					RX_SEGMENT_MRU;
 			i++;
 			desc++;
 		}
@@ -579,7 +585,6 @@ static void clear_tx_desc(struct sw *sw)
 static int eth_poll(struct napi_struct *napi, int budget)
 {
 	struct sw *sw = container_of(napi, struct sw, napi);
-	struct net_device *dev;
 	struct _rx_ring *rx_ring = sw->rx_ring;
 	int received = 0;
 	unsigned int length;
@@ -588,49 +593,82 @@ static int eth_poll(struct napi_struct *
 
 	while (desc->cown) {
 		struct sk_buff *skb;
+		int reserve = SKB_HEAD_ALIGN;
 
 		if (received >= budget)
 			break;
 
-		skb = rx_ring->buff_tab[i];
+		/* process received frame */
+		dma_unmap_single(NULL, rx_ring->phys_tab[i],
+				 RX_SEGMENT_MRU, DMA_FROM_DEVICE);
+
+		skb = build_skb(rx_ring->buff_tab[i]);
+		if (!skb)
+			break;
 
-		dev = switch_port_tab[desc->sp]->netdev;
+		skb->dev = switch_port_tab[desc->sp]->netdev;
 
 		length = desc->sdl;
-		/* process received frame */
-		dma_unmap_single(&dev->dev, rx_ring->phys_tab[i],
-				 length, DMA_FROM_DEVICE);
+		if (desc->fsd && !desc->lsd)
+			length = RX_SEGMENT_MRU;
+
+		if (!desc->fsd) {
+			reserve -= NET_IP_ALIGN;
+			if (!desc->lsd)
+				length += NET_IP_ALIGN;
+		}
 
+		skb_reserve(skb, reserve);
 		skb_put(skb, length);
 
-		skb->dev = dev;
-		skb->protocol = eth_type_trans(skb, dev);
+		if (!sw->frag_first)
+			sw->frag_first = skb;
+		else {
+			if (sw->frag_first == sw->frag_last)
+				skb_frag_add_head(sw->frag_first, skb);
+			else
+				sw->frag_last->next = skb;
+			sw->frag_first->len += skb->len;
+			sw->frag_first->data_len += skb->len;
+			sw->frag_first->truesize += skb->truesize;
+		}
+		sw->frag_last = skb;
 
-		dev->stats.rx_packets++;
-		dev->stats.rx_bytes += length;
+		if (desc->lsd) {
+			struct net_device *dev;
+
+			skb = sw->frag_first;
+			dev = skb->dev;
+			skb->protocol = eth_type_trans(skb, dev);
+
+			dev->stats.rx_packets++;
+			dev->stats.rx_bytes += skb->len;
+
+			/* RX Hardware checksum offload */
+			skb->ip_summed = CHECKSUM_NONE;
+			switch (desc->prot) {
+				case 1:
+				case 2:
+				case 5:
+				case 6:
+				case 13:
+				case 14:
+					if (desc->l4f)
+						break;
 
-		/* RX Hardware checksum offload */
-		switch (desc->prot) {
-			case 1:
-			case 2:
-			case 5:
-			case 6:
-			case 13:
-			case 14:
-				if (desc->l4f)
-					skb->ip_summed = CHECKSUM_NONE;
-				else
 					skb->ip_summed = CHECKSUM_UNNECESSARY;
-			break;
-			default:
-				skb->ip_summed = CHECKSUM_NONE;
-			break;
-		}
+					break;
+				default:
+				break;
+			}
 
-		napi_gro_receive(napi, skb);
+			napi_gro_receive(napi, skb);
 
-		received++;
+			sw->frag_first = NULL;
+			sw->frag_last = NULL;
+		}
 
+		received++;
 		if (++i == RX_DESCS) {
 			i = 0;
 			desc = &(rx_ring)->desc[i];
@@ -653,42 +691,60 @@ static int eth_poll(struct napi_struct *
 	return received;
 }
 
+static void eth_set_desc(struct _tx_ring *tx_ring, int index, int index_last,
+			 void *data, int len, u32 config0, u32 pmap)
+{
+	struct tx_desc *tx_desc = &(tx_ring)->desc[index];
+	unsigned int phys;
+
+	phys = dma_map_single(NULL, data, len, DMA_TO_DEVICE);
+	tx_desc->sdp = phys;
+	tx_desc->pmap = pmap;
+	tx_ring->phys_tab[index] = phys;
+
+	config0 |= len;
+	if (index == TX_DESCS - 1)
+		config0 |= END_OF_RING;
+	if (index == index_last)
+		config0 |= LAST_SEGMENT;
+
+	mb();
+	tx_desc->config0 = config0;
+}
+
 static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct port *port = netdev_priv(dev);
 	struct sw *sw = port->sw;
 	struct _tx_ring *tx_ring = sw->tx_ring;
-	struct tx_desc *tx_desc;
-	int index;
-	int len;
+	struct sk_buff *skb1;
 	char pmap = (1 << port->id);
-	unsigned int phys;
 	int nr_frags = skb_shinfo(skb)->nr_frags;
-	struct skb_frag_struct *frag;
+	int nr_desc = nr_frags;
+	int index0, index, index_last;
+	int len0;
 	unsigned int i;
-	u32 config0 = 0;
+	u32 config0;
 
 	if (pmap == 8)
 		pmap = (1 << 4);
 
-	if (skb->len > CNS3XXX_MAX_MTU) {
-		dev_kfree_skb(skb);
-		dev->stats.tx_errors++;
-		return NETDEV_TX_OK;
-	}
+	skb_walk_frags(skb, skb1)
+		nr_desc++;
 
 	spin_lock(&tx_lock);
 
-	if ((tx_ring->num_used + nr_frags) >= TX_DESCS) {
+	if ((tx_ring->num_used + nr_desc + 1) >= TX_DESCS) {
 		clear_tx_desc(sw);
-		if ((tx_ring->num_used + nr_frags) >= TX_DESCS) {
+		if ((tx_ring->num_used + nr_desc + 1) >= TX_DESCS) {
 			spin_unlock(&tx_lock);
 			return NETDEV_TX_BUSY;
 		}
 	}
 
-	index = tx_ring->cur_index;
-	tx_ring->cur_index = ((tx_ring->cur_index + nr_frags + 1) % TX_DESCS);
+	index = index0 = tx_ring->cur_index;
+	index_last = (index0 + nr_desc) % TX_DESCS;
+	tx_ring->cur_index = (index_last + 1) % TX_DESCS;
 
 	spin_unlock(&tx_lock);
 
@@ -696,79 +752,41 @@ static int eth_xmit(struct sk_buff *skb,
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		config0 |= UDP_CHECKSUM | TCP_CHECKSUM;
 
-	if (!nr_frags) {
-		tx_desc = &(tx_ring)->desc[index];
-
-		len = skb->len;
+	len0 = skb->len;
 
-		phys = dma_map_single(NULL, skb->data, len,
-				      	DMA_TO_DEVICE);
+	/* fragments */
+	for (i = 0; i < nr_frags; i++) {
+		struct skb_frag_struct *frag;
+		void *addr;
 
-		tx_desc->sdp = phys;
-		tx_desc->pmap = pmap;
-		tx_ring->phys_tab[index] = phys;
+		index = (index + 1) % TX_DESCS;
 
-		tx_ring->buff_tab[index] = skb;
-		config0 |= FIRST_SEGMENT | LAST_SEGMENT;
-	} else {
-		index = ((index + nr_frags) % TX_DESCS);
-		tx_desc = &(tx_ring)->desc[index];
+		frag = &skb_shinfo(skb)->frags[i];
+		addr = page_address(skb_frag_page(frag)) + frag->page_offset;
 
-		/* fragments */
-		for (i = nr_frags; i > 0; i--) {
-			u32 config;
-			void *addr;
-
-			frag = &skb_shinfo(skb)->frags[i-1];
-			len = frag->size;
-
-			addr = page_address(skb_frag_page(frag)) +
-			       frag->page_offset;
-			phys = dma_map_single(NULL, addr, len, DMA_TO_DEVICE);
-
-			tx_desc->sdp = phys;
-
-			tx_desc->pmap = pmap;
-			tx_ring->phys_tab[index] = phys;
-
-			config = config0 | len;
-			if (i == nr_frags) {
-				config |= LAST_SEGMENT;
-				tx_ring->buff_tab[index] = skb;
-			}
-			if (index == TX_DESCS - 1)
-				config |= END_OF_RING;
-			tx_desc->config0 = config;
-
-			if (index == 0) {
-				index = TX_DESCS - 1;
-				tx_desc = &(tx_ring)->desc[index];
-			} else {
-				index--;
-				tx_desc--;
-			}
-		}
+		eth_set_desc(tx_ring, index, index_last, addr, frag->size,
+			     config0, pmap);
+	}
 
-		/* header */
-		len = skb->len - skb->data_len;
+	if (nr_frags)
+		len0 = skb->len - skb->data_len;
 
-		phys = dma_map_single(NULL, skb->data, len, DMA_TO_DEVICE);
+	skb_walk_frags(skb, skb1) {
+		index = (index + 1) % TX_DESCS;
+		len0 -= skb1->len;
 
-		tx_desc->sdp = phys;
-		tx_desc->pmap = pmap;
-		tx_ring->phys_tab[index] = phys;
-		config0 |= FIRST_SEGMENT;
+		eth_set_desc(tx_ring, index, index_last, skb1->data, skb1->len,
+			     config0, pmap);
 	}
 
-	if (index == TX_DESCS - 1)
-		config0 |= END_OF_RING;
-
-	tx_desc->config0 = config0 | len;
+	tx_ring->buff_tab[index0] = skb;
+	eth_set_desc(tx_ring, index0, index_last, skb->data, len0,
+		     config0 | FIRST_SEGMENT, pmap);
 
 	mb();
 
 	spin_lock(&tx_lock);
-	tx_ring->num_used += nr_frags + 1;
+	tx_ring->num_used += nr_desc + 1;
 	spin_unlock(&tx_lock);
 
 	dev->stats.tx_packets++;
@@ -849,24 +867,24 @@ static int init_rings(struct sw *sw)
 	/* Setup RX buffers */
 	for (i = 0; i < RX_DESCS; i++) {
 		struct rx_desc *desc = &(rx_ring)->desc[i];
-		struct sk_buff *skb;
-		if (!(skb = dev_alloc_skb(MAX_MRU)))
+		void *buf;
+
+		buf = kzalloc(RX_SEGMENT_ALLOC_SIZE, GFP_KERNEL);
+		if (!buf)
 			return -ENOMEM;
-		if (SKB_DMA_REALIGN)
-			skb_reserve(skb, SKB_DMA_REALIGN);
-		skb_reserve(skb, NET_IP_ALIGN);
-		desc->sdl = CNS3XXX_MAX_MTU;
+
+		desc->sdl = RX_SEGMENT_MRU;
 		if (i == (RX_DESCS - 1))
 			desc->eor = 1;
 		desc->fsd = 1;
 		desc->lsd = 1;
 
-		desc->sdp = dma_map_single(NULL, skb->data,
-					    CNS3XXX_MAX_MTU, DMA_FROM_DEVICE);
-		if (dma_mapping_error(NULL, desc->sdp)) {
+		desc->sdp = dma_map_single(NULL, buf + SKB_HEAD_ALIGN,
+					   RX_SEGMENT_MRU, DMA_FROM_DEVICE);
+		if (dma_mapping_error(NULL, desc->sdp))
 			return -EIO;
-		}
-		rx_ring->buff_tab[i] = skb;
+
+		rx_ring->buff_tab[i] = buf;
 		rx_ring->phys_tab[i] = desc->sdp;
 		desc->cown = 0;
 	}
@@ -905,12 +923,13 @@ static void destroy_rings(struct sw *sw)
 			struct _rx_ring *rx_ring = sw->rx_ring;
 			struct rx_desc *desc = &(rx_ring)->desc[i];
 			struct sk_buff *skb = sw->rx_ring->buff_tab[i];
-			if (skb) {
-				dma_unmap_single(NULL,
-						 desc->sdp,
-						 CNS3XXX_MAX_MTU, DMA_FROM_DEVICE);
-				dev_kfree_skb(skb);
-			}
+
+			if (!skb)
+				continue;
+
+			dma_unmap_single(NULL, desc->sdp, RX_SEGMENT_MRU,
+					 DMA_FROM_DEVICE);
+			dev_kfree_skb(skb);
 		}
 		dma_pool_free(rx_dma_pool, sw->rx_ring->desc, sw->rx_ring->phys_addr);
 		dma_pool_destroy(rx_dma_pool);
@@ -1085,13 +1104,22 @@ static int eth_set_mac(struct net_device
 	return 0;
 }
 
+static int cns3xxx_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu > MAX_MTU)
+		return -EINVAL;
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
 static const struct net_device_ops cns3xxx_netdev_ops = {
 	.ndo_open = eth_open,
 	.ndo_stop = eth_close,
 	.ndo_start_xmit = eth_xmit,
 	.ndo_set_rx_mode = eth_rx_mode,
 	.ndo_do_ioctl = eth_ioctl,
-	.ndo_change_mtu = eth_change_mtu,
+	.ndo_change_mtu = cns3xxx_change_mtu,
 	.ndo_set_mac_address = eth_set_mac,
 	.ndo_validate_addr = eth_validate_addr,
 };
@@ -1111,7 +1139,7 @@ static int __devinit eth_init_one(struct
 	if (!(napi_dev = alloc_etherdev(sizeof(struct sw))))
 		return -ENOMEM;
 	strcpy(napi_dev->name, "switch%d");
-	napi_dev->features = NETIF_F_IP_CSUM | NETIF_F_SG;
+	napi_dev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST;
 
 	SET_NETDEV_DEV(napi_dev, &pdev->dev);
 	sw = netdev_priv(napi_dev);
@@ -1124,6 +1152,10 @@ static int __devinit eth_init_one(struct
 		goto err_free;
 	}
 
+	temp = __raw_readl(&sw->regs->phy_auto_addr);
+	temp |= (3 << 30); /* maximum frame length: 9600 bytes */
+	__raw_writel(temp, &sw->regs->phy_auto_addr);
+
 	for (i = 0; i < 4; i++) {
 		temp = __raw_readl(&sw->regs->mac_cfg[i]);
 		temp |= (PORT_DISABLE);
@@ -1185,7 +1217,7 @@ static int __devinit eth_init_one(struct
 		dev->netdev_ops = &cns3xxx_netdev_ops;
 		dev->ethtool_ops = &cns3xxx_ethtool_ops;
 		dev->tx_queue_len = 1000;
-		dev->features = NETIF_F_IP_CSUM | NETIF_F_SG;
+		dev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST;
 
 		switch_port_tab[port->id] = port;
 		memcpy(dev->dev_addr, &plat->hwaddr[i], ETH_ALEN);