diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 413 |
1 files changed, 267 insertions, 146 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index b715a55cccc4..308a7d0c277f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1090,7 +1090,7 @@ int dev_open(struct net_device *dev) | |||
1090 | /* | 1090 | /* |
1091 | * Enable NET_DMA | 1091 | * Enable NET_DMA |
1092 | */ | 1092 | */ |
1093 | dmaengine_get(); | 1093 | net_dmaengine_get(); |
1094 | 1094 | ||
1095 | /* | 1095 | /* |
1096 | * Initialize multicasting status | 1096 | * Initialize multicasting status |
@@ -1172,7 +1172,7 @@ int dev_close(struct net_device *dev) | |||
1172 | /* | 1172 | /* |
1173 | * Shutdown NET_DMA | 1173 | * Shutdown NET_DMA |
1174 | */ | 1174 | */ |
1175 | dmaengine_put(); | 1175 | net_dmaengine_put(); |
1176 | 1176 | ||
1177 | return 0; | 1177 | return 0; |
1178 | } | 1178 | } |
@@ -1336,7 +1336,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1336 | { | 1336 | { |
1337 | struct packet_type *ptype; | 1337 | struct packet_type *ptype; |
1338 | 1338 | ||
1339 | #ifdef CONFIG_NET_CLS_ACT | ||
1340 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) | ||
1341 | net_timestamp(skb); | ||
1342 | #else | ||
1339 | net_timestamp(skb); | 1343 | net_timestamp(skb); |
1344 | #endif | ||
1340 | 1345 | ||
1341 | rcu_read_lock(); | 1346 | rcu_read_lock(); |
1342 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 1347 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
@@ -1430,7 +1435,7 @@ void netif_device_detach(struct net_device *dev) | |||
1430 | { | 1435 | { |
1431 | if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && | 1436 | if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && |
1432 | netif_running(dev)) { | 1437 | netif_running(dev)) { |
1433 | netif_stop_queue(dev); | 1438 | netif_tx_stop_all_queues(dev); |
1434 | } | 1439 | } |
1435 | } | 1440 | } |
1436 | EXPORT_SYMBOL(netif_device_detach); | 1441 | EXPORT_SYMBOL(netif_device_detach); |
@@ -1445,7 +1450,7 @@ void netif_device_attach(struct net_device *dev) | |||
1445 | { | 1450 | { |
1446 | if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && | 1451 | if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && |
1447 | netif_running(dev)) { | 1452 | netif_running(dev)) { |
1448 | netif_wake_queue(dev); | 1453 | netif_tx_wake_all_queues(dev); |
1449 | __netdev_watchdog_up(dev); | 1454 | __netdev_watchdog_up(dev); |
1450 | } | 1455 | } |
1451 | } | 1456 | } |
@@ -1457,7 +1462,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) | |||
1457 | ((features & NETIF_F_IP_CSUM) && | 1462 | ((features & NETIF_F_IP_CSUM) && |
1458 | protocol == htons(ETH_P_IP)) || | 1463 | protocol == htons(ETH_P_IP)) || |
1459 | ((features & NETIF_F_IPV6_CSUM) && | 1464 | ((features & NETIF_F_IPV6_CSUM) && |
1460 | protocol == htons(ETH_P_IPV6))); | 1465 | protocol == htons(ETH_P_IPV6)) || |
1466 | ((features & NETIF_F_FCOE_CRC) && | ||
1467 | protocol == htons(ETH_P_FCOE))); | ||
1461 | } | 1468 | } |
1462 | 1469 | ||
1463 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) | 1470 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) |
@@ -1534,7 +1541,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |||
1534 | skb->mac_len = skb->network_header - skb->mac_header; | 1541 | skb->mac_len = skb->network_header - skb->mac_header; |
1535 | __skb_pull(skb, skb->mac_len); | 1542 | __skb_pull(skb, skb->mac_len); |
1536 | 1543 | ||
1537 | if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { | 1544 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
1545 | struct net_device *dev = skb->dev; | ||
1546 | struct ethtool_drvinfo info = {}; | ||
1547 | |||
1548 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) | ||
1549 | dev->ethtool_ops->get_drvinfo(dev, &info); | ||
1550 | |||
1551 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " | ||
1552 | "ip_summed=%d", | ||
1553 | info.driver, dev ? dev->features : 0L, | ||
1554 | skb->sk ? skb->sk->sk_route_caps : 0L, | ||
1555 | skb->len, skb->data_len, skb->ip_summed); | ||
1556 | |||
1538 | if (skb_header_cloned(skb) && | 1557 | if (skb_header_cloned(skb) && |
1539 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | 1558 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
1540 | return ERR_PTR(err); | 1559 | return ERR_PTR(err); |
@@ -1656,8 +1675,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1656 | struct netdev_queue *txq) | 1675 | struct netdev_queue *txq) |
1657 | { | 1676 | { |
1658 | const struct net_device_ops *ops = dev->netdev_ops; | 1677 | const struct net_device_ops *ops = dev->netdev_ops; |
1678 | int rc; | ||
1659 | 1679 | ||
1660 | prefetch(&dev->netdev_ops->ndo_start_xmit); | ||
1661 | if (likely(!skb->next)) { | 1680 | if (likely(!skb->next)) { |
1662 | if (!list_empty(&ptype_all)) | 1681 | if (!list_empty(&ptype_all)) |
1663 | dev_queue_xmit_nit(skb, dev); | 1682 | dev_queue_xmit_nit(skb, dev); |
@@ -1669,13 +1688,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1669 | goto gso; | 1688 | goto gso; |
1670 | } | 1689 | } |
1671 | 1690 | ||
1672 | return ops->ndo_start_xmit(skb, dev); | 1691 | rc = ops->ndo_start_xmit(skb, dev); |
1692 | /* | ||
1693 | * TODO: if skb_orphan() was called by | ||
1694 | * dev->hard_start_xmit() (for example, the unmodified | ||
1695 | * igb driver does that; bnx2 doesn't), then | ||
1696 | * skb_tx_software_timestamp() will be unable to send | ||
1697 | * back the time stamp. | ||
1698 | * | ||
1699 | * How can this be prevented? Always create another | ||
1700 | * reference to the socket before calling | ||
1701 | * dev->hard_start_xmit()? Prevent that skb_orphan() | ||
1702 | * does anything in dev->hard_start_xmit() by clearing | ||
1703 | * the skb destructor before the call and restoring it | ||
1704 | * afterwards, then doing the skb_orphan() ourselves? | ||
1705 | */ | ||
1706 | return rc; | ||
1673 | } | 1707 | } |
1674 | 1708 | ||
1675 | gso: | 1709 | gso: |
1676 | do { | 1710 | do { |
1677 | struct sk_buff *nskb = skb->next; | 1711 | struct sk_buff *nskb = skb->next; |
1678 | int rc; | ||
1679 | 1712 | ||
1680 | skb->next = nskb->next; | 1713 | skb->next = nskb->next; |
1681 | nskb->next = NULL; | 1714 | nskb->next = NULL; |
@@ -1696,59 +1729,24 @@ out_kfree_skb: | |||
1696 | return 0; | 1729 | return 0; |
1697 | } | 1730 | } |
1698 | 1731 | ||
1699 | static u32 simple_tx_hashrnd; | 1732 | static u32 skb_tx_hashrnd; |
1700 | static int simple_tx_hashrnd_initialized = 0; | ||
1701 | 1733 | ||
1702 | static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) | 1734 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) |
1703 | { | 1735 | { |
1704 | u32 addr1, addr2, ports; | 1736 | u32 hash; |
1705 | u32 hash, ihl; | ||
1706 | u8 ip_proto = 0; | ||
1707 | |||
1708 | if (unlikely(!simple_tx_hashrnd_initialized)) { | ||
1709 | get_random_bytes(&simple_tx_hashrnd, 4); | ||
1710 | simple_tx_hashrnd_initialized = 1; | ||
1711 | } | ||
1712 | |||
1713 | switch (skb->protocol) { | ||
1714 | case htons(ETH_P_IP): | ||
1715 | if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) | ||
1716 | ip_proto = ip_hdr(skb)->protocol; | ||
1717 | addr1 = ip_hdr(skb)->saddr; | ||
1718 | addr2 = ip_hdr(skb)->daddr; | ||
1719 | ihl = ip_hdr(skb)->ihl; | ||
1720 | break; | ||
1721 | case htons(ETH_P_IPV6): | ||
1722 | ip_proto = ipv6_hdr(skb)->nexthdr; | ||
1723 | addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; | ||
1724 | addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; | ||
1725 | ihl = (40 >> 2); | ||
1726 | break; | ||
1727 | default: | ||
1728 | return 0; | ||
1729 | } | ||
1730 | 1737 | ||
1738 | if (skb_rx_queue_recorded(skb)) { | ||
1739 | hash = skb_get_rx_queue(skb); | ||
1740 | } else if (skb->sk && skb->sk->sk_hash) { | ||
1741 | hash = skb->sk->sk_hash; | ||
1742 | } else | ||
1743 | hash = skb->protocol; | ||
1731 | 1744 | ||
1732 | switch (ip_proto) { | 1745 | hash = jhash_1word(hash, skb_tx_hashrnd); |
1733 | case IPPROTO_TCP: | ||
1734 | case IPPROTO_UDP: | ||
1735 | case IPPROTO_DCCP: | ||
1736 | case IPPROTO_ESP: | ||
1737 | case IPPROTO_AH: | ||
1738 | case IPPROTO_SCTP: | ||
1739 | case IPPROTO_UDPLITE: | ||
1740 | ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); | ||
1741 | break; | ||
1742 | |||
1743 | default: | ||
1744 | ports = 0; | ||
1745 | break; | ||
1746 | } | ||
1747 | |||
1748 | hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); | ||
1749 | 1746 | ||
1750 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 1747 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
1751 | } | 1748 | } |
1749 | EXPORT_SYMBOL(skb_tx_hash); | ||
1752 | 1750 | ||
1753 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, | 1751 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
1754 | struct sk_buff *skb) | 1752 | struct sk_buff *skb) |
@@ -1759,7 +1757,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, | |||
1759 | if (ops->ndo_select_queue) | 1757 | if (ops->ndo_select_queue) |
1760 | queue_index = ops->ndo_select_queue(dev, skb); | 1758 | queue_index = ops->ndo_select_queue(dev, skb); |
1761 | else if (dev->real_num_tx_queues > 1) | 1759 | else if (dev->real_num_tx_queues > 1) |
1762 | queue_index = simple_tx_hash(dev, skb); | 1760 | queue_index = skb_tx_hash(dev, skb); |
1763 | 1761 | ||
1764 | skb_set_queue_mapping(skb, queue_index); | 1762 | skb_set_queue_mapping(skb, queue_index); |
1765 | return netdev_get_tx_queue(dev, queue_index); | 1763 | return netdev_get_tx_queue(dev, queue_index); |
@@ -2255,12 +2253,6 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2255 | 2253 | ||
2256 | rcu_read_lock(); | 2254 | rcu_read_lock(); |
2257 | 2255 | ||
2258 | /* Don't receive packets in an exiting network namespace */ | ||
2259 | if (!net_alive(dev_net(skb->dev))) { | ||
2260 | kfree_skb(skb); | ||
2261 | goto out; | ||
2262 | } | ||
2263 | |||
2264 | #ifdef CONFIG_NET_CLS_ACT | 2256 | #ifdef CONFIG_NET_CLS_ACT |
2265 | if (skb->tc_verd & TC_NCLS) { | 2257 | if (skb->tc_verd & TC_NCLS) { |
2266 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | 2258 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
@@ -2291,6 +2283,8 @@ ncls: | |||
2291 | if (!skb) | 2283 | if (!skb) |
2292 | goto out; | 2284 | goto out; |
2293 | 2285 | ||
2286 | skb_orphan(skb); | ||
2287 | |||
2294 | type = skb->protocol; | 2288 | type = skb->protocol; |
2295 | list_for_each_entry_rcu(ptype, | 2289 | list_for_each_entry_rcu(ptype, |
2296 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { | 2290 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
@@ -2339,8 +2333,10 @@ static int napi_gro_complete(struct sk_buff *skb) | |||
2339 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | 2333 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
2340 | int err = -ENOENT; | 2334 | int err = -ENOENT; |
2341 | 2335 | ||
2342 | if (NAPI_GRO_CB(skb)->count == 1) | 2336 | if (NAPI_GRO_CB(skb)->count == 1) { |
2337 | skb_shinfo(skb)->gso_size = 0; | ||
2343 | goto out; | 2338 | goto out; |
2339 | } | ||
2344 | 2340 | ||
2345 | rcu_read_lock(); | 2341 | rcu_read_lock(); |
2346 | list_for_each_entry_rcu(ptype, head, list) { | 2342 | list_for_each_entry_rcu(ptype, head, list) { |
@@ -2359,8 +2355,6 @@ static int napi_gro_complete(struct sk_buff *skb) | |||
2359 | } | 2355 | } |
2360 | 2356 | ||
2361 | out: | 2357 | out: |
2362 | skb_shinfo(skb)->gso_size = 0; | ||
2363 | __skb_push(skb, -skb_network_offset(skb)); | ||
2364 | return netif_receive_skb(skb); | 2358 | return netif_receive_skb(skb); |
2365 | } | 2359 | } |
2366 | 2360 | ||
@@ -2374,50 +2368,59 @@ void napi_gro_flush(struct napi_struct *napi) | |||
2374 | napi_gro_complete(skb); | 2368 | napi_gro_complete(skb); |
2375 | } | 2369 | } |
2376 | 2370 | ||
2371 | napi->gro_count = 0; | ||
2377 | napi->gro_list = NULL; | 2372 | napi->gro_list = NULL; |
2378 | } | 2373 | } |
2379 | EXPORT_SYMBOL(napi_gro_flush); | 2374 | EXPORT_SYMBOL(napi_gro_flush); |
2380 | 2375 | ||
2376 | void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) | ||
2377 | { | ||
2378 | unsigned int offset = skb_gro_offset(skb); | ||
2379 | |||
2380 | hlen += offset; | ||
2381 | if (hlen <= skb_headlen(skb)) | ||
2382 | return skb->data + offset; | ||
2383 | |||
2384 | if (unlikely(!skb_shinfo(skb)->nr_frags || | ||
2385 | skb_shinfo(skb)->frags[0].size <= | ||
2386 | hlen - skb_headlen(skb) || | ||
2387 | PageHighMem(skb_shinfo(skb)->frags[0].page))) | ||
2388 | return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; | ||
2389 | |||
2390 | return page_address(skb_shinfo(skb)->frags[0].page) + | ||
2391 | skb_shinfo(skb)->frags[0].page_offset + | ||
2392 | offset - skb_headlen(skb); | ||
2393 | } | ||
2394 | EXPORT_SYMBOL(skb_gro_header); | ||
2395 | |||
2381 | int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2396 | int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2382 | { | 2397 | { |
2383 | struct sk_buff **pp = NULL; | 2398 | struct sk_buff **pp = NULL; |
2384 | struct packet_type *ptype; | 2399 | struct packet_type *ptype; |
2385 | __be16 type = skb->protocol; | 2400 | __be16 type = skb->protocol; |
2386 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | 2401 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
2387 | int count = 0; | ||
2388 | int same_flow; | 2402 | int same_flow; |
2389 | int mac_len; | 2403 | int mac_len; |
2390 | int free; | 2404 | int ret; |
2391 | 2405 | ||
2392 | if (!(skb->dev->features & NETIF_F_GRO)) | 2406 | if (!(skb->dev->features & NETIF_F_GRO)) |
2393 | goto normal; | 2407 | goto normal; |
2394 | 2408 | ||
2409 | if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) | ||
2410 | goto normal; | ||
2411 | |||
2395 | rcu_read_lock(); | 2412 | rcu_read_lock(); |
2396 | list_for_each_entry_rcu(ptype, head, list) { | 2413 | list_for_each_entry_rcu(ptype, head, list) { |
2397 | struct sk_buff *p; | ||
2398 | |||
2399 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) | 2414 | if (ptype->type != type || ptype->dev || !ptype->gro_receive) |
2400 | continue; | 2415 | continue; |
2401 | 2416 | ||
2402 | skb_reset_network_header(skb); | 2417 | skb_set_network_header(skb, skb_gro_offset(skb)); |
2403 | mac_len = skb->network_header - skb->mac_header; | 2418 | mac_len = skb->network_header - skb->mac_header; |
2404 | skb->mac_len = mac_len; | 2419 | skb->mac_len = mac_len; |
2405 | NAPI_GRO_CB(skb)->same_flow = 0; | 2420 | NAPI_GRO_CB(skb)->same_flow = 0; |
2406 | NAPI_GRO_CB(skb)->flush = 0; | 2421 | NAPI_GRO_CB(skb)->flush = 0; |
2407 | NAPI_GRO_CB(skb)->free = 0; | 2422 | NAPI_GRO_CB(skb)->free = 0; |
2408 | 2423 | ||
2409 | for (p = napi->gro_list; p; p = p->next) { | ||
2410 | count++; | ||
2411 | |||
2412 | if (!NAPI_GRO_CB(p)->same_flow) | ||
2413 | continue; | ||
2414 | |||
2415 | if (p->mac_len != mac_len || | ||
2416 | memcmp(skb_mac_header(p), skb_mac_header(skb), | ||
2417 | mac_len)) | ||
2418 | NAPI_GRO_CB(p)->same_flow = 0; | ||
2419 | } | ||
2420 | |||
2421 | pp = ptype->gro_receive(&napi->gro_list, skb); | 2424 | pp = ptype->gro_receive(&napi->gro_list, skb); |
2422 | break; | 2425 | break; |
2423 | } | 2426 | } |
@@ -2427,7 +2430,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2427 | goto normal; | 2430 | goto normal; |
2428 | 2431 | ||
2429 | same_flow = NAPI_GRO_CB(skb)->same_flow; | 2432 | same_flow = NAPI_GRO_CB(skb)->same_flow; |
2430 | free = NAPI_GRO_CB(skb)->free; | 2433 | ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; |
2431 | 2434 | ||
2432 | if (pp) { | 2435 | if (pp) { |
2433 | struct sk_buff *nskb = *pp; | 2436 | struct sk_buff *nskb = *pp; |
@@ -2435,27 +2438,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2435 | *pp = nskb->next; | 2438 | *pp = nskb->next; |
2436 | nskb->next = NULL; | 2439 | nskb->next = NULL; |
2437 | napi_gro_complete(nskb); | 2440 | napi_gro_complete(nskb); |
2438 | count--; | 2441 | napi->gro_count--; |
2439 | } | 2442 | } |
2440 | 2443 | ||
2441 | if (same_flow) | 2444 | if (same_flow) |
2442 | goto ok; | 2445 | goto ok; |
2443 | 2446 | ||
2444 | if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { | 2447 | if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) |
2445 | __skb_push(skb, -skb_network_offset(skb)); | ||
2446 | goto normal; | 2448 | goto normal; |
2447 | } | ||
2448 | 2449 | ||
2450 | napi->gro_count++; | ||
2449 | NAPI_GRO_CB(skb)->count = 1; | 2451 | NAPI_GRO_CB(skb)->count = 1; |
2450 | skb_shinfo(skb)->gso_size = skb->len; | 2452 | skb_shinfo(skb)->gso_size = skb_gro_len(skb); |
2451 | skb->next = napi->gro_list; | 2453 | skb->next = napi->gro_list; |
2452 | napi->gro_list = skb; | 2454 | napi->gro_list = skb; |
2455 | ret = GRO_HELD; | ||
2456 | |||
2457 | pull: | ||
2458 | if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { | ||
2459 | if (napi->gro_list == skb) | ||
2460 | napi->gro_list = skb->next; | ||
2461 | ret = GRO_DROP; | ||
2462 | } | ||
2453 | 2463 | ||
2454 | ok: | 2464 | ok: |
2455 | return free; | 2465 | return ret; |
2456 | 2466 | ||
2457 | normal: | 2467 | normal: |
2458 | return -1; | 2468 | ret = GRO_NORMAL; |
2469 | goto pull; | ||
2459 | } | 2470 | } |
2460 | EXPORT_SYMBOL(dev_gro_receive); | 2471 | EXPORT_SYMBOL(dev_gro_receive); |
2461 | 2472 | ||
@@ -2463,37 +2474,50 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2463 | { | 2474 | { |
2464 | struct sk_buff *p; | 2475 | struct sk_buff *p; |
2465 | 2476 | ||
2477 | if (netpoll_rx_on(skb)) | ||
2478 | return GRO_NORMAL; | ||
2479 | |||
2466 | for (p = napi->gro_list; p; p = p->next) { | 2480 | for (p = napi->gro_list; p; p = p->next) { |
2467 | NAPI_GRO_CB(p)->same_flow = 1; | 2481 | NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) |
2482 | && !compare_ether_header(skb_mac_header(p), | ||
2483 | skb_gro_mac_header(skb)); | ||
2468 | NAPI_GRO_CB(p)->flush = 0; | 2484 | NAPI_GRO_CB(p)->flush = 0; |
2469 | } | 2485 | } |
2470 | 2486 | ||
2471 | return dev_gro_receive(napi, skb); | 2487 | return dev_gro_receive(napi, skb); |
2472 | } | 2488 | } |
2473 | 2489 | ||
2474 | int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2490 | int napi_skb_finish(int ret, struct sk_buff *skb) |
2475 | { | 2491 | { |
2476 | switch (__napi_gro_receive(napi, skb)) { | 2492 | int err = NET_RX_SUCCESS; |
2477 | case -1: | 2493 | |
2494 | switch (ret) { | ||
2495 | case GRO_NORMAL: | ||
2478 | return netif_receive_skb(skb); | 2496 | return netif_receive_skb(skb); |
2479 | 2497 | ||
2480 | case 1: | 2498 | case GRO_DROP: |
2499 | err = NET_RX_DROP; | ||
2500 | /* fall through */ | ||
2501 | |||
2502 | case GRO_MERGED_FREE: | ||
2481 | kfree_skb(skb); | 2503 | kfree_skb(skb); |
2482 | break; | 2504 | break; |
2483 | } | 2505 | } |
2484 | 2506 | ||
2485 | return NET_RX_SUCCESS; | 2507 | return err; |
2486 | } | 2508 | } |
2487 | EXPORT_SYMBOL(napi_gro_receive); | 2509 | EXPORT_SYMBOL(napi_skb_finish); |
2488 | 2510 | ||
2489 | void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | 2511 | int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2490 | { | 2512 | { |
2491 | skb_shinfo(skb)->nr_frags = 0; | 2513 | skb_gro_reset_offset(skb); |
2492 | 2514 | ||
2493 | skb->len -= skb->data_len; | 2515 | return napi_skb_finish(__napi_gro_receive(napi, skb), skb); |
2494 | skb->truesize -= skb->data_len; | 2516 | } |
2495 | skb->data_len = 0; | 2517 | EXPORT_SYMBOL(napi_gro_receive); |
2496 | 2518 | ||
2519 | void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | ||
2520 | { | ||
2497 | __skb_pull(skb, skb_headlen(skb)); | 2521 | __skb_pull(skb, skb_headlen(skb)); |
2498 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); | 2522 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); |
2499 | 2523 | ||
@@ -2506,6 +2530,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, | |||
2506 | { | 2530 | { |
2507 | struct net_device *dev = napi->dev; | 2531 | struct net_device *dev = napi->dev; |
2508 | struct sk_buff *skb = napi->skb; | 2532 | struct sk_buff *skb = napi->skb; |
2533 | struct ethhdr *eth; | ||
2534 | skb_frag_t *frag; | ||
2535 | int i; | ||
2509 | 2536 | ||
2510 | napi->skb = NULL; | 2537 | napi->skb = NULL; |
2511 | 2538 | ||
@@ -2518,19 +2545,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, | |||
2518 | } | 2545 | } |
2519 | 2546 | ||
2520 | BUG_ON(info->nr_frags > MAX_SKB_FRAGS); | 2547 | BUG_ON(info->nr_frags > MAX_SKB_FRAGS); |
2548 | frag = info->frags; | ||
2549 | |||
2550 | for (i = 0; i < info->nr_frags; i++) { | ||
2551 | skb_fill_page_desc(skb, i, frag->page, frag->page_offset, | ||
2552 | frag->size); | ||
2553 | frag++; | ||
2554 | } | ||
2521 | skb_shinfo(skb)->nr_frags = info->nr_frags; | 2555 | skb_shinfo(skb)->nr_frags = info->nr_frags; |
2522 | memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); | ||
2523 | 2556 | ||
2524 | skb->data_len = info->len; | 2557 | skb->data_len = info->len; |
2525 | skb->len += info->len; | 2558 | skb->len += info->len; |
2526 | skb->truesize += info->len; | 2559 | skb->truesize += info->len; |
2527 | 2560 | ||
2528 | if (!pskb_may_pull(skb, ETH_HLEN)) { | 2561 | skb_reset_mac_header(skb); |
2562 | skb_gro_reset_offset(skb); | ||
2563 | |||
2564 | eth = skb_gro_header(skb, sizeof(*eth)); | ||
2565 | if (!eth) { | ||
2529 | napi_reuse_skb(napi, skb); | 2566 | napi_reuse_skb(napi, skb); |
2567 | skb = NULL; | ||
2530 | goto out; | 2568 | goto out; |
2531 | } | 2569 | } |
2532 | 2570 | ||
2533 | skb->protocol = eth_type_trans(skb, dev); | 2571 | skb_gro_pull(skb, sizeof(*eth)); |
2572 | |||
2573 | /* | ||
2574 | * This works because the only protocols we care about don't require | ||
2575 | * special handling. We'll fix it up properly at the end. | ||
2576 | */ | ||
2577 | skb->protocol = eth->h_proto; | ||
2534 | 2578 | ||
2535 | skb->ip_summed = info->ip_summed; | 2579 | skb->ip_summed = info->ip_summed; |
2536 | skb->csum = info->csum; | 2580 | skb->csum = info->csum; |
@@ -2540,29 +2584,43 @@ out: | |||
2540 | } | 2584 | } |
2541 | EXPORT_SYMBOL(napi_fraginfo_skb); | 2585 | EXPORT_SYMBOL(napi_fraginfo_skb); |
2542 | 2586 | ||
2543 | int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) | 2587 | int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) |
2544 | { | 2588 | { |
2545 | struct sk_buff *skb = napi_fraginfo_skb(napi, info); | 2589 | int err = NET_RX_SUCCESS; |
2546 | int err = NET_RX_DROP; | ||
2547 | 2590 | ||
2548 | if (!skb) | 2591 | switch (ret) { |
2549 | goto out; | 2592 | case GRO_NORMAL: |
2593 | case GRO_HELD: | ||
2594 | skb->protocol = eth_type_trans(skb, napi->dev); | ||
2550 | 2595 | ||
2551 | err = NET_RX_SUCCESS; | 2596 | if (ret == GRO_NORMAL) |
2597 | return netif_receive_skb(skb); | ||
2552 | 2598 | ||
2553 | switch (__napi_gro_receive(napi, skb)) { | 2599 | skb_gro_pull(skb, -ETH_HLEN); |
2554 | case -1: | 2600 | break; |
2555 | return netif_receive_skb(skb); | ||
2556 | 2601 | ||
2557 | case 0: | 2602 | case GRO_DROP: |
2558 | goto out; | 2603 | err = NET_RX_DROP; |
2559 | } | 2604 | /* fall through */ |
2560 | 2605 | ||
2561 | napi_reuse_skb(napi, skb); | 2606 | case GRO_MERGED_FREE: |
2607 | napi_reuse_skb(napi, skb); | ||
2608 | break; | ||
2609 | } | ||
2562 | 2610 | ||
2563 | out: | ||
2564 | return err; | 2611 | return err; |
2565 | } | 2612 | } |
2613 | EXPORT_SYMBOL(napi_frags_finish); | ||
2614 | |||
2615 | int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) | ||
2616 | { | ||
2617 | struct sk_buff *skb = napi_fraginfo_skb(napi, info); | ||
2618 | |||
2619 | if (!skb) | ||
2620 | return NET_RX_DROP; | ||
2621 | |||
2622 | return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); | ||
2623 | } | ||
2566 | EXPORT_SYMBOL(napi_gro_frags); | 2624 | EXPORT_SYMBOL(napi_gro_frags); |
2567 | 2625 | ||
2568 | static int process_backlog(struct napi_struct *napi, int quota) | 2626 | static int process_backlog(struct napi_struct *napi, int quota) |
@@ -2584,11 +2642,9 @@ static int process_backlog(struct napi_struct *napi, int quota) | |||
2584 | } | 2642 | } |
2585 | local_irq_enable(); | 2643 | local_irq_enable(); |
2586 | 2644 | ||
2587 | napi_gro_receive(napi, skb); | 2645 | netif_receive_skb(skb); |
2588 | } while (++work < quota && jiffies == start_time); | 2646 | } while (++work < quota && jiffies == start_time); |
2589 | 2647 | ||
2590 | napi_gro_flush(napi); | ||
2591 | |||
2592 | return work; | 2648 | return work; |
2593 | } | 2649 | } |
2594 | 2650 | ||
@@ -2642,6 +2698,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, | |||
2642 | int (*poll)(struct napi_struct *, int), int weight) | 2698 | int (*poll)(struct napi_struct *, int), int weight) |
2643 | { | 2699 | { |
2644 | INIT_LIST_HEAD(&napi->poll_list); | 2700 | INIT_LIST_HEAD(&napi->poll_list); |
2701 | napi->gro_count = 0; | ||
2645 | napi->gro_list = NULL; | 2702 | napi->gro_list = NULL; |
2646 | napi->skb = NULL; | 2703 | napi->skb = NULL; |
2647 | napi->poll = poll; | 2704 | napi->poll = poll; |
@@ -2661,7 +2718,7 @@ void netif_napi_del(struct napi_struct *napi) | |||
2661 | struct sk_buff *skb, *next; | 2718 | struct sk_buff *skb, *next; |
2662 | 2719 | ||
2663 | list_del_init(&napi->dev_list); | 2720 | list_del_init(&napi->dev_list); |
2664 | kfree(napi->skb); | 2721 | kfree_skb(napi->skb); |
2665 | 2722 | ||
2666 | for (skb = napi->gro_list; skb; skb = next) { | 2723 | for (skb = napi->gro_list; skb; skb = next) { |
2667 | next = skb->next; | 2724 | next = skb->next; |
@@ -2670,6 +2727,7 @@ void netif_napi_del(struct napi_struct *napi) | |||
2670 | } | 2727 | } |
2671 | 2728 | ||
2672 | napi->gro_list = NULL; | 2729 | napi->gro_list = NULL; |
2730 | napi->gro_count = 0; | ||
2673 | } | 2731 | } |
2674 | EXPORT_SYMBOL(netif_napi_del); | 2732 | EXPORT_SYMBOL(netif_napi_del); |
2675 | 2733 | ||
@@ -3938,6 +3996,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
3938 | cmd == SIOCSMIIREG || | 3996 | cmd == SIOCSMIIREG || |
3939 | cmd == SIOCBRADDIF || | 3997 | cmd == SIOCBRADDIF || |
3940 | cmd == SIOCBRDELIF || | 3998 | cmd == SIOCBRDELIF || |
3999 | cmd == SIOCSHWTSTAMP || | ||
3941 | cmd == SIOCWANDEV) { | 4000 | cmd == SIOCWANDEV) { |
3942 | err = -EOPNOTSUPP; | 4001 | err = -EOPNOTSUPP; |
3943 | if (ops->ndo_do_ioctl) { | 4002 | if (ops->ndo_do_ioctl) { |
@@ -4092,6 +4151,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4092 | case SIOCBONDCHANGEACTIVE: | 4151 | case SIOCBONDCHANGEACTIVE: |
4093 | case SIOCBRADDIF: | 4152 | case SIOCBRADDIF: |
4094 | case SIOCBRDELIF: | 4153 | case SIOCBRDELIF: |
4154 | case SIOCSHWTSTAMP: | ||
4095 | if (!capable(CAP_NET_ADMIN)) | 4155 | if (!capable(CAP_NET_ADMIN)) |
4096 | return -EPERM; | 4156 | return -EPERM; |
4097 | /* fall through */ | 4157 | /* fall through */ |
@@ -4272,6 +4332,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) | |||
4272 | } | 4332 | } |
4273 | EXPORT_SYMBOL(netdev_fix_features); | 4333 | EXPORT_SYMBOL(netdev_fix_features); |
4274 | 4334 | ||
4335 | /* Some devices need to (re-)set their netdev_ops inside | ||
4336 | * ->init() or similar. If that happens, we have to setup | ||
4337 | * the compat pointers again. | ||
4338 | */ | ||
4339 | void netdev_resync_ops(struct net_device *dev) | ||
4340 | { | ||
4341 | #ifdef CONFIG_COMPAT_NET_DEV_OPS | ||
4342 | const struct net_device_ops *ops = dev->netdev_ops; | ||
4343 | |||
4344 | dev->init = ops->ndo_init; | ||
4345 | dev->uninit = ops->ndo_uninit; | ||
4346 | dev->open = ops->ndo_open; | ||
4347 | dev->change_rx_flags = ops->ndo_change_rx_flags; | ||
4348 | dev->set_rx_mode = ops->ndo_set_rx_mode; | ||
4349 | dev->set_multicast_list = ops->ndo_set_multicast_list; | ||
4350 | dev->set_mac_address = ops->ndo_set_mac_address; | ||
4351 | dev->validate_addr = ops->ndo_validate_addr; | ||
4352 | dev->do_ioctl = ops->ndo_do_ioctl; | ||
4353 | dev->set_config = ops->ndo_set_config; | ||
4354 | dev->change_mtu = ops->ndo_change_mtu; | ||
4355 | dev->neigh_setup = ops->ndo_neigh_setup; | ||
4356 | dev->tx_timeout = ops->ndo_tx_timeout; | ||
4357 | dev->get_stats = ops->ndo_get_stats; | ||
4358 | dev->vlan_rx_register = ops->ndo_vlan_rx_register; | ||
4359 | dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; | ||
4360 | dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; | ||
4361 | #ifdef CONFIG_NET_POLL_CONTROLLER | ||
4362 | dev->poll_controller = ops->ndo_poll_controller; | ||
4363 | #endif | ||
4364 | #endif | ||
4365 | } | ||
4366 | EXPORT_SYMBOL(netdev_resync_ops); | ||
4367 | |||
4275 | /** | 4368 | /** |
4276 | * register_netdevice - register a network device | 4369 | * register_netdevice - register a network device |
4277 | * @dev: device to register | 4370 | * @dev: device to register |
@@ -4312,38 +4405,18 @@ int register_netdevice(struct net_device *dev) | |||
4312 | dev->iflink = -1; | 4405 | dev->iflink = -1; |
4313 | 4406 | ||
4314 | #ifdef CONFIG_COMPAT_NET_DEV_OPS | 4407 | #ifdef CONFIG_COMPAT_NET_DEV_OPS |
4315 | /* Netdevice_ops API compatiability support. | 4408 | /* Netdevice_ops API compatibility support. |
4316 | * This is temporary until all network devices are converted. | 4409 | * This is temporary until all network devices are converted. |
4317 | */ | 4410 | */ |
4318 | if (dev->netdev_ops) { | 4411 | if (dev->netdev_ops) { |
4319 | const struct net_device_ops *ops = dev->netdev_ops; | 4412 | netdev_resync_ops(dev); |
4320 | |||
4321 | dev->init = ops->ndo_init; | ||
4322 | dev->uninit = ops->ndo_uninit; | ||
4323 | dev->open = ops->ndo_open; | ||
4324 | dev->change_rx_flags = ops->ndo_change_rx_flags; | ||
4325 | dev->set_rx_mode = ops->ndo_set_rx_mode; | ||
4326 | dev->set_multicast_list = ops->ndo_set_multicast_list; | ||
4327 | dev->set_mac_address = ops->ndo_set_mac_address; | ||
4328 | dev->validate_addr = ops->ndo_validate_addr; | ||
4329 | dev->do_ioctl = ops->ndo_do_ioctl; | ||
4330 | dev->set_config = ops->ndo_set_config; | ||
4331 | dev->change_mtu = ops->ndo_change_mtu; | ||
4332 | dev->tx_timeout = ops->ndo_tx_timeout; | ||
4333 | dev->get_stats = ops->ndo_get_stats; | ||
4334 | dev->vlan_rx_register = ops->ndo_vlan_rx_register; | ||
4335 | dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; | ||
4336 | dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; | ||
4337 | #ifdef CONFIG_NET_POLL_CONTROLLER | ||
4338 | dev->poll_controller = ops->ndo_poll_controller; | ||
4339 | #endif | ||
4340 | } else { | 4413 | } else { |
4341 | char drivername[64]; | 4414 | char drivername[64]; |
4342 | pr_info("%s (%s): not using net_device_ops yet\n", | 4415 | pr_info("%s (%s): not using net_device_ops yet\n", |
4343 | dev->name, netdev_drivername(dev, drivername, 64)); | 4416 | dev->name, netdev_drivername(dev, drivername, 64)); |
4344 | 4417 | ||
4345 | /* This works only because net_device_ops and the | 4418 | /* This works only because net_device_ops and the |
4346 | compatiablity structure are the same. */ | 4419 | compatibility structure are the same. */ |
4347 | dev->netdev_ops = (void *) &(dev->init); | 4420 | dev->netdev_ops = (void *) &(dev->init); |
4348 | } | 4421 | } |
4349 | #endif | 4422 | #endif |
@@ -4434,6 +4507,45 @@ err_uninit: | |||
4434 | } | 4507 | } |
4435 | 4508 | ||
4436 | /** | 4509 | /** |
4510 | * init_dummy_netdev - init a dummy network device for NAPI | ||
4511 | * @dev: device to init | ||
4512 | * | ||
4513 | * This takes a network device structure and initialize the minimum | ||
4514 | * amount of fields so it can be used to schedule NAPI polls without | ||
4515 | * registering a full blown interface. This is to be used by drivers | ||
4516 | * that need to tie several hardware interfaces to a single NAPI | ||
4517 | * poll scheduler due to HW limitations. | ||
4518 | */ | ||
4519 | int init_dummy_netdev(struct net_device *dev) | ||
4520 | { | ||
4521 | /* Clear everything. Note we don't initialize spinlocks | ||
4522 | * are they aren't supposed to be taken by any of the | ||
4523 | * NAPI code and this dummy netdev is supposed to be | ||
4524 | * only ever used for NAPI polls | ||
4525 | */ | ||
4526 | memset(dev, 0, sizeof(struct net_device)); | ||
4527 | |||
4528 | /* make sure we BUG if trying to hit standard | ||
4529 | * register/unregister code path | ||
4530 | */ | ||
4531 | dev->reg_state = NETREG_DUMMY; | ||
4532 | |||
4533 | /* initialize the ref count */ | ||
4534 | atomic_set(&dev->refcnt, 1); | ||
4535 | |||
4536 | /* NAPI wants this */ | ||
4537 | INIT_LIST_HEAD(&dev->napi_list); | ||
4538 | |||
4539 | /* a dummy interface is started by default */ | ||
4540 | set_bit(__LINK_STATE_PRESENT, &dev->state); | ||
4541 | set_bit(__LINK_STATE_START, &dev->state); | ||
4542 | |||
4543 | return 0; | ||
4544 | } | ||
4545 | EXPORT_SYMBOL_GPL(init_dummy_netdev); | ||
4546 | |||
4547 | |||
4548 | /** | ||
4437 | * register_netdev - register a network device | 4549 | * register_netdev - register a network device |
4438 | * @dev: device to register | 4550 | * @dev: device to register |
4439 | * | 4551 | * |
@@ -5136,6 +5248,7 @@ static int __init net_dev_init(void) | |||
5136 | queue->backlog.poll = process_backlog; | 5248 | queue->backlog.poll = process_backlog; |
5137 | queue->backlog.weight = weight_p; | 5249 | queue->backlog.weight = weight_p; |
5138 | queue->backlog.gro_list = NULL; | 5250 | queue->backlog.gro_list = NULL; |
5251 | queue->backlog.gro_count = 0; | ||
5139 | } | 5252 | } |
5140 | 5253 | ||
5141 | dev_boot_phase = 0; | 5254 | dev_boot_phase = 0; |
@@ -5168,6 +5281,14 @@ out: | |||
5168 | 5281 | ||
5169 | subsys_initcall(net_dev_init); | 5282 | subsys_initcall(net_dev_init); |
5170 | 5283 | ||
5284 | static int __init initialize_hashrnd(void) | ||
5285 | { | ||
5286 | get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); | ||
5287 | return 0; | ||
5288 | } | ||
5289 | |||
5290 | late_initcall_sync(initialize_hashrnd); | ||
5291 | |||
5171 | EXPORT_SYMBOL(__dev_get_by_index); | 5292 | EXPORT_SYMBOL(__dev_get_by_index); |
5172 | EXPORT_SYMBOL(__dev_get_by_name); | 5293 | EXPORT_SYMBOL(__dev_get_by_name); |
5173 | EXPORT_SYMBOL(__dev_remove_pack); | 5294 | EXPORT_SYMBOL(__dev_remove_pack); |