aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c413
1 files changed, 267 insertions, 146 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index b715a55cccc4..308a7d0c277f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1090,7 +1090,7 @@ int dev_open(struct net_device *dev)
1090 /* 1090 /*
1091 * Enable NET_DMA 1091 * Enable NET_DMA
1092 */ 1092 */
1093 dmaengine_get(); 1093 net_dmaengine_get();
1094 1094
1095 /* 1095 /*
1096 * Initialize multicasting status 1096 * Initialize multicasting status
@@ -1172,7 +1172,7 @@ int dev_close(struct net_device *dev)
1172 /* 1172 /*
1173 * Shutdown NET_DMA 1173 * Shutdown NET_DMA
1174 */ 1174 */
1175 dmaengine_put(); 1175 net_dmaengine_put();
1176 1176
1177 return 0; 1177 return 0;
1178} 1178}
@@ -1336,7 +1336,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1336{ 1336{
1337 struct packet_type *ptype; 1337 struct packet_type *ptype;
1338 1338
1339#ifdef CONFIG_NET_CLS_ACT
1340 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1341 net_timestamp(skb);
1342#else
1339 net_timestamp(skb); 1343 net_timestamp(skb);
1344#endif
1340 1345
1341 rcu_read_lock(); 1346 rcu_read_lock();
1342 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1347 list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1430,7 +1435,7 @@ void netif_device_detach(struct net_device *dev)
1430{ 1435{
1431 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 1436 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1432 netif_running(dev)) { 1437 netif_running(dev)) {
1433 netif_stop_queue(dev); 1438 netif_tx_stop_all_queues(dev);
1434 } 1439 }
1435} 1440}
1436EXPORT_SYMBOL(netif_device_detach); 1441EXPORT_SYMBOL(netif_device_detach);
@@ -1445,7 +1450,7 @@ void netif_device_attach(struct net_device *dev)
1445{ 1450{
1446 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 1451 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1447 netif_running(dev)) { 1452 netif_running(dev)) {
1448 netif_wake_queue(dev); 1453 netif_tx_wake_all_queues(dev);
1449 __netdev_watchdog_up(dev); 1454 __netdev_watchdog_up(dev);
1450 } 1455 }
1451} 1456}
@@ -1457,7 +1462,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1457 ((features & NETIF_F_IP_CSUM) && 1462 ((features & NETIF_F_IP_CSUM) &&
1458 protocol == htons(ETH_P_IP)) || 1463 protocol == htons(ETH_P_IP)) ||
1459 ((features & NETIF_F_IPV6_CSUM) && 1464 ((features & NETIF_F_IPV6_CSUM) &&
1460 protocol == htons(ETH_P_IPV6))); 1465 protocol == htons(ETH_P_IPV6)) ||
1466 ((features & NETIF_F_FCOE_CRC) &&
1467 protocol == htons(ETH_P_FCOE)));
1461} 1468}
1462 1469
1463static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1470static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
@@ -1534,7 +1541,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1534 skb->mac_len = skb->network_header - skb->mac_header; 1541 skb->mac_len = skb->network_header - skb->mac_header;
1535 __skb_pull(skb, skb->mac_len); 1542 __skb_pull(skb, skb->mac_len);
1536 1543
1537 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { 1544 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1545 struct net_device *dev = skb->dev;
1546 struct ethtool_drvinfo info = {};
1547
1548 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1549 dev->ethtool_ops->get_drvinfo(dev, &info);
1550
1551 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1552 "ip_summed=%d",
1553 info.driver, dev ? dev->features : 0L,
1554 skb->sk ? skb->sk->sk_route_caps : 0L,
1555 skb->len, skb->data_len, skb->ip_summed);
1556
1538 if (skb_header_cloned(skb) && 1557 if (skb_header_cloned(skb) &&
1539 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 1558 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1540 return ERR_PTR(err); 1559 return ERR_PTR(err);
@@ -1656,8 +1675,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1656 struct netdev_queue *txq) 1675 struct netdev_queue *txq)
1657{ 1676{
1658 const struct net_device_ops *ops = dev->netdev_ops; 1677 const struct net_device_ops *ops = dev->netdev_ops;
1678 int rc;
1659 1679
1660 prefetch(&dev->netdev_ops->ndo_start_xmit);
1661 if (likely(!skb->next)) { 1680 if (likely(!skb->next)) {
1662 if (!list_empty(&ptype_all)) 1681 if (!list_empty(&ptype_all))
1663 dev_queue_xmit_nit(skb, dev); 1682 dev_queue_xmit_nit(skb, dev);
@@ -1669,13 +1688,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1669 goto gso; 1688 goto gso;
1670 } 1689 }
1671 1690
1672 return ops->ndo_start_xmit(skb, dev); 1691 rc = ops->ndo_start_xmit(skb, dev);
1692 /*
1693 * TODO: if skb_orphan() was called by
1694 * dev->hard_start_xmit() (for example, the unmodified
1695 * igb driver does that; bnx2 doesn't), then
1696 * skb_tx_software_timestamp() will be unable to send
1697 * back the time stamp.
1698 *
1699 * How can this be prevented? Always create another
1700 * reference to the socket before calling
1701 * dev->hard_start_xmit()? Prevent that skb_orphan()
1702 * does anything in dev->hard_start_xmit() by clearing
1703 * the skb destructor before the call and restoring it
1704 * afterwards, then doing the skb_orphan() ourselves?
1705 */
1706 return rc;
1673 } 1707 }
1674 1708
1675gso: 1709gso:
1676 do { 1710 do {
1677 struct sk_buff *nskb = skb->next; 1711 struct sk_buff *nskb = skb->next;
1678 int rc;
1679 1712
1680 skb->next = nskb->next; 1713 skb->next = nskb->next;
1681 nskb->next = NULL; 1714 nskb->next = NULL;
@@ -1696,59 +1729,24 @@ out_kfree_skb:
1696 return 0; 1729 return 0;
1697} 1730}
1698 1731
1699static u32 simple_tx_hashrnd; 1732static u32 skb_tx_hashrnd;
1700static int simple_tx_hashrnd_initialized = 0;
1701 1733
1702static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) 1734u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1703{ 1735{
1704 u32 addr1, addr2, ports; 1736 u32 hash;
1705 u32 hash, ihl;
1706 u8 ip_proto = 0;
1707
1708 if (unlikely(!simple_tx_hashrnd_initialized)) {
1709 get_random_bytes(&simple_tx_hashrnd, 4);
1710 simple_tx_hashrnd_initialized = 1;
1711 }
1712
1713 switch (skb->protocol) {
1714 case htons(ETH_P_IP):
1715 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1716 ip_proto = ip_hdr(skb)->protocol;
1717 addr1 = ip_hdr(skb)->saddr;
1718 addr2 = ip_hdr(skb)->daddr;
1719 ihl = ip_hdr(skb)->ihl;
1720 break;
1721 case htons(ETH_P_IPV6):
1722 ip_proto = ipv6_hdr(skb)->nexthdr;
1723 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1724 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1725 ihl = (40 >> 2);
1726 break;
1727 default:
1728 return 0;
1729 }
1730 1737
1738 if (skb_rx_queue_recorded(skb)) {
1739 hash = skb_get_rx_queue(skb);
1740 } else if (skb->sk && skb->sk->sk_hash) {
1741 hash = skb->sk->sk_hash;
1742 } else
1743 hash = skb->protocol;
1731 1744
1732 switch (ip_proto) { 1745 hash = jhash_1word(hash, skb_tx_hashrnd);
1733 case IPPROTO_TCP:
1734 case IPPROTO_UDP:
1735 case IPPROTO_DCCP:
1736 case IPPROTO_ESP:
1737 case IPPROTO_AH:
1738 case IPPROTO_SCTP:
1739 case IPPROTO_UDPLITE:
1740 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1741 break;
1742
1743 default:
1744 ports = 0;
1745 break;
1746 }
1747
1748 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1749 1746
1750 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 1747 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1751} 1748}
1749EXPORT_SYMBOL(skb_tx_hash);
1752 1750
1753static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1751static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1754 struct sk_buff *skb) 1752 struct sk_buff *skb)
@@ -1759,7 +1757,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1759 if (ops->ndo_select_queue) 1757 if (ops->ndo_select_queue)
1760 queue_index = ops->ndo_select_queue(dev, skb); 1758 queue_index = ops->ndo_select_queue(dev, skb);
1761 else if (dev->real_num_tx_queues > 1) 1759 else if (dev->real_num_tx_queues > 1)
1762 queue_index = simple_tx_hash(dev, skb); 1760 queue_index = skb_tx_hash(dev, skb);
1763 1761
1764 skb_set_queue_mapping(skb, queue_index); 1762 skb_set_queue_mapping(skb, queue_index);
1765 return netdev_get_tx_queue(dev, queue_index); 1763 return netdev_get_tx_queue(dev, queue_index);
@@ -2255,12 +2253,6 @@ int netif_receive_skb(struct sk_buff *skb)
2255 2253
2256 rcu_read_lock(); 2254 rcu_read_lock();
2257 2255
2258 /* Don't receive packets in an exiting network namespace */
2259 if (!net_alive(dev_net(skb->dev))) {
2260 kfree_skb(skb);
2261 goto out;
2262 }
2263
2264#ifdef CONFIG_NET_CLS_ACT 2256#ifdef CONFIG_NET_CLS_ACT
2265 if (skb->tc_verd & TC_NCLS) { 2257 if (skb->tc_verd & TC_NCLS) {
2266 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 2258 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -2291,6 +2283,8 @@ ncls:
2291 if (!skb) 2283 if (!skb)
2292 goto out; 2284 goto out;
2293 2285
2286 skb_orphan(skb);
2287
2294 type = skb->protocol; 2288 type = skb->protocol;
2295 list_for_each_entry_rcu(ptype, 2289 list_for_each_entry_rcu(ptype,
2296 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2290 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2339,8 +2333,10 @@ static int napi_gro_complete(struct sk_buff *skb)
2339 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2333 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2340 int err = -ENOENT; 2334 int err = -ENOENT;
2341 2335
2342 if (NAPI_GRO_CB(skb)->count == 1) 2336 if (NAPI_GRO_CB(skb)->count == 1) {
2337 skb_shinfo(skb)->gso_size = 0;
2343 goto out; 2338 goto out;
2339 }
2344 2340
2345 rcu_read_lock(); 2341 rcu_read_lock();
2346 list_for_each_entry_rcu(ptype, head, list) { 2342 list_for_each_entry_rcu(ptype, head, list) {
@@ -2359,8 +2355,6 @@ static int napi_gro_complete(struct sk_buff *skb)
2359 } 2355 }
2360 2356
2361out: 2357out:
2362 skb_shinfo(skb)->gso_size = 0;
2363 __skb_push(skb, -skb_network_offset(skb));
2364 return netif_receive_skb(skb); 2358 return netif_receive_skb(skb);
2365} 2359}
2366 2360
@@ -2374,50 +2368,59 @@ void napi_gro_flush(struct napi_struct *napi)
2374 napi_gro_complete(skb); 2368 napi_gro_complete(skb);
2375 } 2369 }
2376 2370
2371 napi->gro_count = 0;
2377 napi->gro_list = NULL; 2372 napi->gro_list = NULL;
2378} 2373}
2379EXPORT_SYMBOL(napi_gro_flush); 2374EXPORT_SYMBOL(napi_gro_flush);
2380 2375
2376void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
2377{
2378 unsigned int offset = skb_gro_offset(skb);
2379
2380 hlen += offset;
2381 if (hlen <= skb_headlen(skb))
2382 return skb->data + offset;
2383
2384 if (unlikely(!skb_shinfo(skb)->nr_frags ||
2385 skb_shinfo(skb)->frags[0].size <=
2386 hlen - skb_headlen(skb) ||
2387 PageHighMem(skb_shinfo(skb)->frags[0].page)))
2388 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
2389
2390 return page_address(skb_shinfo(skb)->frags[0].page) +
2391 skb_shinfo(skb)->frags[0].page_offset +
2392 offset - skb_headlen(skb);
2393}
2394EXPORT_SYMBOL(skb_gro_header);
2395
2381int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2396int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2382{ 2397{
2383 struct sk_buff **pp = NULL; 2398 struct sk_buff **pp = NULL;
2384 struct packet_type *ptype; 2399 struct packet_type *ptype;
2385 __be16 type = skb->protocol; 2400 __be16 type = skb->protocol;
2386 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2401 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2387 int count = 0;
2388 int same_flow; 2402 int same_flow;
2389 int mac_len; 2403 int mac_len;
2390 int free; 2404 int ret;
2391 2405
2392 if (!(skb->dev->features & NETIF_F_GRO)) 2406 if (!(skb->dev->features & NETIF_F_GRO))
2393 goto normal; 2407 goto normal;
2394 2408
2409 if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
2410 goto normal;
2411
2395 rcu_read_lock(); 2412 rcu_read_lock();
2396 list_for_each_entry_rcu(ptype, head, list) { 2413 list_for_each_entry_rcu(ptype, head, list) {
2397 struct sk_buff *p;
2398
2399 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2414 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2400 continue; 2415 continue;
2401 2416
2402 skb_reset_network_header(skb); 2417 skb_set_network_header(skb, skb_gro_offset(skb));
2403 mac_len = skb->network_header - skb->mac_header; 2418 mac_len = skb->network_header - skb->mac_header;
2404 skb->mac_len = mac_len; 2419 skb->mac_len = mac_len;
2405 NAPI_GRO_CB(skb)->same_flow = 0; 2420 NAPI_GRO_CB(skb)->same_flow = 0;
2406 NAPI_GRO_CB(skb)->flush = 0; 2421 NAPI_GRO_CB(skb)->flush = 0;
2407 NAPI_GRO_CB(skb)->free = 0; 2422 NAPI_GRO_CB(skb)->free = 0;
2408 2423
2409 for (p = napi->gro_list; p; p = p->next) {
2410 count++;
2411
2412 if (!NAPI_GRO_CB(p)->same_flow)
2413 continue;
2414
2415 if (p->mac_len != mac_len ||
2416 memcmp(skb_mac_header(p), skb_mac_header(skb),
2417 mac_len))
2418 NAPI_GRO_CB(p)->same_flow = 0;
2419 }
2420
2421 pp = ptype->gro_receive(&napi->gro_list, skb); 2424 pp = ptype->gro_receive(&napi->gro_list, skb);
2422 break; 2425 break;
2423 } 2426 }
@@ -2427,7 +2430,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2427 goto normal; 2430 goto normal;
2428 2431
2429 same_flow = NAPI_GRO_CB(skb)->same_flow; 2432 same_flow = NAPI_GRO_CB(skb)->same_flow;
2430 free = NAPI_GRO_CB(skb)->free; 2433 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2431 2434
2432 if (pp) { 2435 if (pp) {
2433 struct sk_buff *nskb = *pp; 2436 struct sk_buff *nskb = *pp;
@@ -2435,27 +2438,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2435 *pp = nskb->next; 2438 *pp = nskb->next;
2436 nskb->next = NULL; 2439 nskb->next = NULL;
2437 napi_gro_complete(nskb); 2440 napi_gro_complete(nskb);
2438 count--; 2441 napi->gro_count--;
2439 } 2442 }
2440 2443
2441 if (same_flow) 2444 if (same_flow)
2442 goto ok; 2445 goto ok;
2443 2446
2444 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { 2447 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2445 __skb_push(skb, -skb_network_offset(skb));
2446 goto normal; 2448 goto normal;
2447 }
2448 2449
2450 napi->gro_count++;
2449 NAPI_GRO_CB(skb)->count = 1; 2451 NAPI_GRO_CB(skb)->count = 1;
2450 skb_shinfo(skb)->gso_size = skb->len; 2452 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2451 skb->next = napi->gro_list; 2453 skb->next = napi->gro_list;
2452 napi->gro_list = skb; 2454 napi->gro_list = skb;
2455 ret = GRO_HELD;
2456
2457pull:
2458 if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
2459 if (napi->gro_list == skb)
2460 napi->gro_list = skb->next;
2461 ret = GRO_DROP;
2462 }
2453 2463
2454ok: 2464ok:
2455 return free; 2465 return ret;
2456 2466
2457normal: 2467normal:
2458 return -1; 2468 ret = GRO_NORMAL;
2469 goto pull;
2459} 2470}
2460EXPORT_SYMBOL(dev_gro_receive); 2471EXPORT_SYMBOL(dev_gro_receive);
2461 2472
@@ -2463,37 +2474,50 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2463{ 2474{
2464 struct sk_buff *p; 2475 struct sk_buff *p;
2465 2476
2477 if (netpoll_rx_on(skb))
2478 return GRO_NORMAL;
2479
2466 for (p = napi->gro_list; p; p = p->next) { 2480 for (p = napi->gro_list; p; p = p->next) {
2467 NAPI_GRO_CB(p)->same_flow = 1; 2481 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2482 && !compare_ether_header(skb_mac_header(p),
2483 skb_gro_mac_header(skb));
2468 NAPI_GRO_CB(p)->flush = 0; 2484 NAPI_GRO_CB(p)->flush = 0;
2469 } 2485 }
2470 2486
2471 return dev_gro_receive(napi, skb); 2487 return dev_gro_receive(napi, skb);
2472} 2488}
2473 2489
2474int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2490int napi_skb_finish(int ret, struct sk_buff *skb)
2475{ 2491{
2476 switch (__napi_gro_receive(napi, skb)) { 2492 int err = NET_RX_SUCCESS;
2477 case -1: 2493
2494 switch (ret) {
2495 case GRO_NORMAL:
2478 return netif_receive_skb(skb); 2496 return netif_receive_skb(skb);
2479 2497
2480 case 1: 2498 case GRO_DROP:
2499 err = NET_RX_DROP;
2500 /* fall through */
2501
2502 case GRO_MERGED_FREE:
2481 kfree_skb(skb); 2503 kfree_skb(skb);
2482 break; 2504 break;
2483 } 2505 }
2484 2506
2485 return NET_RX_SUCCESS; 2507 return err;
2486} 2508}
2487EXPORT_SYMBOL(napi_gro_receive); 2509EXPORT_SYMBOL(napi_skb_finish);
2488 2510
2489void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 2511int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2490{ 2512{
2491 skb_shinfo(skb)->nr_frags = 0; 2513 skb_gro_reset_offset(skb);
2492 2514
2493 skb->len -= skb->data_len; 2515 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2494 skb->truesize -= skb->data_len; 2516}
2495 skb->data_len = 0; 2517EXPORT_SYMBOL(napi_gro_receive);
2496 2518
2519void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2520{
2497 __skb_pull(skb, skb_headlen(skb)); 2521 __skb_pull(skb, skb_headlen(skb));
2498 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); 2522 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2499 2523
@@ -2506,6 +2530,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2506{ 2530{
2507 struct net_device *dev = napi->dev; 2531 struct net_device *dev = napi->dev;
2508 struct sk_buff *skb = napi->skb; 2532 struct sk_buff *skb = napi->skb;
2533 struct ethhdr *eth;
2534 skb_frag_t *frag;
2535 int i;
2509 2536
2510 napi->skb = NULL; 2537 napi->skb = NULL;
2511 2538
@@ -2518,19 +2545,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2518 } 2545 }
2519 2546
2520 BUG_ON(info->nr_frags > MAX_SKB_FRAGS); 2547 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2548 frag = info->frags;
2549
2550 for (i = 0; i < info->nr_frags; i++) {
2551 skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
2552 frag->size);
2553 frag++;
2554 }
2521 skb_shinfo(skb)->nr_frags = info->nr_frags; 2555 skb_shinfo(skb)->nr_frags = info->nr_frags;
2522 memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
2523 2556
2524 skb->data_len = info->len; 2557 skb->data_len = info->len;
2525 skb->len += info->len; 2558 skb->len += info->len;
2526 skb->truesize += info->len; 2559 skb->truesize += info->len;
2527 2560
2528 if (!pskb_may_pull(skb, ETH_HLEN)) { 2561 skb_reset_mac_header(skb);
2562 skb_gro_reset_offset(skb);
2563
2564 eth = skb_gro_header(skb, sizeof(*eth));
2565 if (!eth) {
2529 napi_reuse_skb(napi, skb); 2566 napi_reuse_skb(napi, skb);
2567 skb = NULL;
2530 goto out; 2568 goto out;
2531 } 2569 }
2532 2570
2533 skb->protocol = eth_type_trans(skb, dev); 2571 skb_gro_pull(skb, sizeof(*eth));
2572
2573 /*
2574 * This works because the only protocols we care about don't require
2575 * special handling. We'll fix it up properly at the end.
2576 */
2577 skb->protocol = eth->h_proto;
2534 2578
2535 skb->ip_summed = info->ip_summed; 2579 skb->ip_summed = info->ip_summed;
2536 skb->csum = info->csum; 2580 skb->csum = info->csum;
@@ -2540,29 +2584,43 @@ out:
2540} 2584}
2541EXPORT_SYMBOL(napi_fraginfo_skb); 2585EXPORT_SYMBOL(napi_fraginfo_skb);
2542 2586
2543int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) 2587int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2544{ 2588{
2545 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 2589 int err = NET_RX_SUCCESS;
2546 int err = NET_RX_DROP;
2547 2590
2548 if (!skb) 2591 switch (ret) {
2549 goto out; 2592 case GRO_NORMAL:
2593 case GRO_HELD:
2594 skb->protocol = eth_type_trans(skb, napi->dev);
2550 2595
2551 err = NET_RX_SUCCESS; 2596 if (ret == GRO_NORMAL)
2597 return netif_receive_skb(skb);
2552 2598
2553 switch (__napi_gro_receive(napi, skb)) { 2599 skb_gro_pull(skb, -ETH_HLEN);
2554 case -1: 2600 break;
2555 return netif_receive_skb(skb);
2556 2601
2557 case 0: 2602 case GRO_DROP:
2558 goto out; 2603 err = NET_RX_DROP;
2559 } 2604 /* fall through */
2560 2605
2561 napi_reuse_skb(napi, skb); 2606 case GRO_MERGED_FREE:
2607 napi_reuse_skb(napi, skb);
2608 break;
2609 }
2562 2610
2563out:
2564 return err; 2611 return err;
2565} 2612}
2613EXPORT_SYMBOL(napi_frags_finish);
2614
2615int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
2616{
2617 struct sk_buff *skb = napi_fraginfo_skb(napi, info);
2618
2619 if (!skb)
2620 return NET_RX_DROP;
2621
2622 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2623}
2566EXPORT_SYMBOL(napi_gro_frags); 2624EXPORT_SYMBOL(napi_gro_frags);
2567 2625
2568static int process_backlog(struct napi_struct *napi, int quota) 2626static int process_backlog(struct napi_struct *napi, int quota)
@@ -2584,11 +2642,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
2584 } 2642 }
2585 local_irq_enable(); 2643 local_irq_enable();
2586 2644
2587 napi_gro_receive(napi, skb); 2645 netif_receive_skb(skb);
2588 } while (++work < quota && jiffies == start_time); 2646 } while (++work < quota && jiffies == start_time);
2589 2647
2590 napi_gro_flush(napi);
2591
2592 return work; 2648 return work;
2593} 2649}
2594 2650
@@ -2642,6 +2698,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2642 int (*poll)(struct napi_struct *, int), int weight) 2698 int (*poll)(struct napi_struct *, int), int weight)
2643{ 2699{
2644 INIT_LIST_HEAD(&napi->poll_list); 2700 INIT_LIST_HEAD(&napi->poll_list);
2701 napi->gro_count = 0;
2645 napi->gro_list = NULL; 2702 napi->gro_list = NULL;
2646 napi->skb = NULL; 2703 napi->skb = NULL;
2647 napi->poll = poll; 2704 napi->poll = poll;
@@ -2661,7 +2718,7 @@ void netif_napi_del(struct napi_struct *napi)
2661 struct sk_buff *skb, *next; 2718 struct sk_buff *skb, *next;
2662 2719
2663 list_del_init(&napi->dev_list); 2720 list_del_init(&napi->dev_list);
2664 kfree(napi->skb); 2721 kfree_skb(napi->skb);
2665 2722
2666 for (skb = napi->gro_list; skb; skb = next) { 2723 for (skb = napi->gro_list; skb; skb = next) {
2667 next = skb->next; 2724 next = skb->next;
@@ -2670,6 +2727,7 @@ void netif_napi_del(struct napi_struct *napi)
2670 } 2727 }
2671 2728
2672 napi->gro_list = NULL; 2729 napi->gro_list = NULL;
2730 napi->gro_count = 0;
2673} 2731}
2674EXPORT_SYMBOL(netif_napi_del); 2732EXPORT_SYMBOL(netif_napi_del);
2675 2733
@@ -3938,6 +3996,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3938 cmd == SIOCSMIIREG || 3996 cmd == SIOCSMIIREG ||
3939 cmd == SIOCBRADDIF || 3997 cmd == SIOCBRADDIF ||
3940 cmd == SIOCBRDELIF || 3998 cmd == SIOCBRDELIF ||
3999 cmd == SIOCSHWTSTAMP ||
3941 cmd == SIOCWANDEV) { 4000 cmd == SIOCWANDEV) {
3942 err = -EOPNOTSUPP; 4001 err = -EOPNOTSUPP;
3943 if (ops->ndo_do_ioctl) { 4002 if (ops->ndo_do_ioctl) {
@@ -4092,6 +4151,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4092 case SIOCBONDCHANGEACTIVE: 4151 case SIOCBONDCHANGEACTIVE:
4093 case SIOCBRADDIF: 4152 case SIOCBRADDIF:
4094 case SIOCBRDELIF: 4153 case SIOCBRDELIF:
4154 case SIOCSHWTSTAMP:
4095 if (!capable(CAP_NET_ADMIN)) 4155 if (!capable(CAP_NET_ADMIN))
4096 return -EPERM; 4156 return -EPERM;
4097 /* fall through */ 4157 /* fall through */
@@ -4272,6 +4332,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4272} 4332}
4273EXPORT_SYMBOL(netdev_fix_features); 4333EXPORT_SYMBOL(netdev_fix_features);
4274 4334
4335/* Some devices need to (re-)set their netdev_ops inside
4336 * ->init() or similar. If that happens, we have to setup
4337 * the compat pointers again.
4338 */
4339void netdev_resync_ops(struct net_device *dev)
4340{
4341#ifdef CONFIG_COMPAT_NET_DEV_OPS
4342 const struct net_device_ops *ops = dev->netdev_ops;
4343
4344 dev->init = ops->ndo_init;
4345 dev->uninit = ops->ndo_uninit;
4346 dev->open = ops->ndo_open;
4347 dev->change_rx_flags = ops->ndo_change_rx_flags;
4348 dev->set_rx_mode = ops->ndo_set_rx_mode;
4349 dev->set_multicast_list = ops->ndo_set_multicast_list;
4350 dev->set_mac_address = ops->ndo_set_mac_address;
4351 dev->validate_addr = ops->ndo_validate_addr;
4352 dev->do_ioctl = ops->ndo_do_ioctl;
4353 dev->set_config = ops->ndo_set_config;
4354 dev->change_mtu = ops->ndo_change_mtu;
4355 dev->neigh_setup = ops->ndo_neigh_setup;
4356 dev->tx_timeout = ops->ndo_tx_timeout;
4357 dev->get_stats = ops->ndo_get_stats;
4358 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4359 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4360 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4361#ifdef CONFIG_NET_POLL_CONTROLLER
4362 dev->poll_controller = ops->ndo_poll_controller;
4363#endif
4364#endif
4365}
4366EXPORT_SYMBOL(netdev_resync_ops);
4367
4275/** 4368/**
4276 * register_netdevice - register a network device 4369 * register_netdevice - register a network device
4277 * @dev: device to register 4370 * @dev: device to register
@@ -4312,38 +4405,18 @@ int register_netdevice(struct net_device *dev)
4312 dev->iflink = -1; 4405 dev->iflink = -1;
4313 4406
4314#ifdef CONFIG_COMPAT_NET_DEV_OPS 4407#ifdef CONFIG_COMPAT_NET_DEV_OPS
4315 /* Netdevice_ops API compatiability support. 4408 /* Netdevice_ops API compatibility support.
4316 * This is temporary until all network devices are converted. 4409 * This is temporary until all network devices are converted.
4317 */ 4410 */
4318 if (dev->netdev_ops) { 4411 if (dev->netdev_ops) {
4319 const struct net_device_ops *ops = dev->netdev_ops; 4412 netdev_resync_ops(dev);
4320
4321 dev->init = ops->ndo_init;
4322 dev->uninit = ops->ndo_uninit;
4323 dev->open = ops->ndo_open;
4324 dev->change_rx_flags = ops->ndo_change_rx_flags;
4325 dev->set_rx_mode = ops->ndo_set_rx_mode;
4326 dev->set_multicast_list = ops->ndo_set_multicast_list;
4327 dev->set_mac_address = ops->ndo_set_mac_address;
4328 dev->validate_addr = ops->ndo_validate_addr;
4329 dev->do_ioctl = ops->ndo_do_ioctl;
4330 dev->set_config = ops->ndo_set_config;
4331 dev->change_mtu = ops->ndo_change_mtu;
4332 dev->tx_timeout = ops->ndo_tx_timeout;
4333 dev->get_stats = ops->ndo_get_stats;
4334 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4335 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4336 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4337#ifdef CONFIG_NET_POLL_CONTROLLER
4338 dev->poll_controller = ops->ndo_poll_controller;
4339#endif
4340 } else { 4413 } else {
4341 char drivername[64]; 4414 char drivername[64];
4342 pr_info("%s (%s): not using net_device_ops yet\n", 4415 pr_info("%s (%s): not using net_device_ops yet\n",
4343 dev->name, netdev_drivername(dev, drivername, 64)); 4416 dev->name, netdev_drivername(dev, drivername, 64));
4344 4417
4345 /* This works only because net_device_ops and the 4418 /* This works only because net_device_ops and the
4346 compatiablity structure are the same. */ 4419 compatibility structure are the same. */
4347 dev->netdev_ops = (void *) &(dev->init); 4420 dev->netdev_ops = (void *) &(dev->init);
4348 } 4421 }
4349#endif 4422#endif
@@ -4434,6 +4507,45 @@ err_uninit:
4434} 4507}
4435 4508
4436/** 4509/**
4510 * init_dummy_netdev - init a dummy network device for NAPI
4511 * @dev: device to init
4512 *
4513 * This takes a network device structure and initialize the minimum
4514 * amount of fields so it can be used to schedule NAPI polls without
4515 * registering a full blown interface. This is to be used by drivers
4516 * that need to tie several hardware interfaces to a single NAPI
4517 * poll scheduler due to HW limitations.
4518 */
4519int init_dummy_netdev(struct net_device *dev)
4520{
4521 /* Clear everything. Note we don't initialize spinlocks
4522 * are they aren't supposed to be taken by any of the
4523 * NAPI code and this dummy netdev is supposed to be
4524 * only ever used for NAPI polls
4525 */
4526 memset(dev, 0, sizeof(struct net_device));
4527
4528 /* make sure we BUG if trying to hit standard
4529 * register/unregister code path
4530 */
4531 dev->reg_state = NETREG_DUMMY;
4532
4533 /* initialize the ref count */
4534 atomic_set(&dev->refcnt, 1);
4535
4536 /* NAPI wants this */
4537 INIT_LIST_HEAD(&dev->napi_list);
4538
4539 /* a dummy interface is started by default */
4540 set_bit(__LINK_STATE_PRESENT, &dev->state);
4541 set_bit(__LINK_STATE_START, &dev->state);
4542
4543 return 0;
4544}
4545EXPORT_SYMBOL_GPL(init_dummy_netdev);
4546
4547
4548/**
4437 * register_netdev - register a network device 4549 * register_netdev - register a network device
4438 * @dev: device to register 4550 * @dev: device to register
4439 * 4551 *
@@ -5136,6 +5248,7 @@ static int __init net_dev_init(void)
5136 queue->backlog.poll = process_backlog; 5248 queue->backlog.poll = process_backlog;
5137 queue->backlog.weight = weight_p; 5249 queue->backlog.weight = weight_p;
5138 queue->backlog.gro_list = NULL; 5250 queue->backlog.gro_list = NULL;
5251 queue->backlog.gro_count = 0;
5139 } 5252 }
5140 5253
5141 dev_boot_phase = 0; 5254 dev_boot_phase = 0;
@@ -5168,6 +5281,14 @@ out:
5168 5281
5169subsys_initcall(net_dev_init); 5282subsys_initcall(net_dev_init);
5170 5283
5284static int __init initialize_hashrnd(void)
5285{
5286 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5287 return 0;
5288}
5289
5290late_initcall_sync(initialize_hashrnd);
5291
5171EXPORT_SYMBOL(__dev_get_by_index); 5292EXPORT_SYMBOL(__dev_get_by_index);
5172EXPORT_SYMBOL(__dev_get_by_name); 5293EXPORT_SYMBOL(__dev_get_by_name);
5173EXPORT_SYMBOL(__dev_remove_pack); 5294EXPORT_SYMBOL(__dev_remove_pack);