diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2011-05-14 06:06:36 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2011-05-14 06:06:36 -0400 |
commit | a18f22a968de17b29f2310cdb7ba69163e65ec15 (patch) | |
tree | a7d56d88fad5e444d7661484109758a2f436129e /net/core/dev.c | |
parent | a1c57e0fec53defe745e64417eacdbd3618c3e66 (diff) | |
parent | 798778b8653f64b7b2162ac70eca10367cff6ce8 (diff) |
Merge branch 'consolidate-clksrc-i8253' of master.kernel.org:~rmk/linux-2.6-arm into timers/clocksource
Conflicts:
arch/ia64/kernel/cyclone.c
arch/mips/kernel/i8253.c
arch/x86/kernel/i8253.c
Reason: Resolve conflicts so further cleanups do not conflict further
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 599 |
1 files changed, 376 insertions, 223 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 8ae6631abcc2..856b6ee9a1d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -132,6 +132,7 @@ | |||
132 | #include <trace/events/skb.h> | 132 | #include <trace/events/skb.h> |
133 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
134 | #include <linux/inetdevice.h> | 134 | #include <linux/inetdevice.h> |
135 | #include <linux/cpu_rmap.h> | ||
135 | 136 | ||
136 | #include "net-sysfs.h" | 137 | #include "net-sysfs.h" |
137 | 138 | ||
@@ -1114,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change); | |||
1114 | void dev_load(struct net *net, const char *name) | 1115 | void dev_load(struct net *net, const char *name) |
1115 | { | 1116 | { |
1116 | struct net_device *dev; | 1117 | struct net_device *dev; |
1118 | int no_module; | ||
1117 | 1119 | ||
1118 | rcu_read_lock(); | 1120 | rcu_read_lock(); |
1119 | dev = dev_get_by_name_rcu(net, name); | 1121 | dev = dev_get_by_name_rcu(net, name); |
1120 | rcu_read_unlock(); | 1122 | rcu_read_unlock(); |
1121 | 1123 | ||
1122 | if (!dev && capable(CAP_NET_ADMIN)) | 1124 | no_module = !dev; |
1123 | request_module("%s", name); | 1125 | if (no_module && capable(CAP_NET_ADMIN)) |
1126 | no_module = request_module("netdev-%s", name); | ||
1127 | if (no_module && capable(CAP_SYS_MODULE)) { | ||
1128 | if (!request_module("%s", name)) | ||
1129 | pr_err("Loading kernel module for a network device " | ||
1130 | "with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s " | ||
1131 | "instead\n", name); | ||
1132 | } | ||
1124 | } | 1133 | } |
1125 | EXPORT_SYMBOL(dev_load); | 1134 | EXPORT_SYMBOL(dev_load); |
1126 | 1135 | ||
@@ -1131,9 +1140,6 @@ static int __dev_open(struct net_device *dev) | |||
1131 | 1140 | ||
1132 | ASSERT_RTNL(); | 1141 | ASSERT_RTNL(); |
1133 | 1142 | ||
1134 | /* | ||
1135 | * Is it even present? | ||
1136 | */ | ||
1137 | if (!netif_device_present(dev)) | 1143 | if (!netif_device_present(dev)) |
1138 | return -ENODEV; | 1144 | return -ENODEV; |
1139 | 1145 | ||
@@ -1142,9 +1148,6 @@ static int __dev_open(struct net_device *dev) | |||
1142 | if (ret) | 1148 | if (ret) |
1143 | return ret; | 1149 | return ret; |
1144 | 1150 | ||
1145 | /* | ||
1146 | * Call device private open method | ||
1147 | */ | ||
1148 | set_bit(__LINK_STATE_START, &dev->state); | 1151 | set_bit(__LINK_STATE_START, &dev->state); |
1149 | 1152 | ||
1150 | if (ops->ndo_validate_addr) | 1153 | if (ops->ndo_validate_addr) |
@@ -1153,31 +1156,12 @@ static int __dev_open(struct net_device *dev) | |||
1153 | if (!ret && ops->ndo_open) | 1156 | if (!ret && ops->ndo_open) |
1154 | ret = ops->ndo_open(dev); | 1157 | ret = ops->ndo_open(dev); |
1155 | 1158 | ||
1156 | /* | ||
1157 | * If it went open OK then: | ||
1158 | */ | ||
1159 | |||
1160 | if (ret) | 1159 | if (ret) |
1161 | clear_bit(__LINK_STATE_START, &dev->state); | 1160 | clear_bit(__LINK_STATE_START, &dev->state); |
1162 | else { | 1161 | else { |
1163 | /* | ||
1164 | * Set the flags. | ||
1165 | */ | ||
1166 | dev->flags |= IFF_UP; | 1162 | dev->flags |= IFF_UP; |
1167 | |||
1168 | /* | ||
1169 | * Enable NET_DMA | ||
1170 | */ | ||
1171 | net_dmaengine_get(); | 1163 | net_dmaengine_get(); |
1172 | |||
1173 | /* | ||
1174 | * Initialize multicasting status | ||
1175 | */ | ||
1176 | dev_set_rx_mode(dev); | 1164 | dev_set_rx_mode(dev); |
1177 | |||
1178 | /* | ||
1179 | * Wakeup transmit queue engine | ||
1180 | */ | ||
1181 | dev_activate(dev); | 1165 | dev_activate(dev); |
1182 | } | 1166 | } |
1183 | 1167 | ||
@@ -1200,22 +1184,13 @@ int dev_open(struct net_device *dev) | |||
1200 | { | 1184 | { |
1201 | int ret; | 1185 | int ret; |
1202 | 1186 | ||
1203 | /* | ||
1204 | * Is it already up? | ||
1205 | */ | ||
1206 | if (dev->flags & IFF_UP) | 1187 | if (dev->flags & IFF_UP) |
1207 | return 0; | 1188 | return 0; |
1208 | 1189 | ||
1209 | /* | ||
1210 | * Open device | ||
1211 | */ | ||
1212 | ret = __dev_open(dev); | 1190 | ret = __dev_open(dev); |
1213 | if (ret < 0) | 1191 | if (ret < 0) |
1214 | return ret; | 1192 | return ret; |
1215 | 1193 | ||
1216 | /* | ||
1217 | * ... and announce new interface. | ||
1218 | */ | ||
1219 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | 1194 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
1220 | call_netdevice_notifiers(NETDEV_UP, dev); | 1195 | call_netdevice_notifiers(NETDEV_UP, dev); |
1221 | 1196 | ||
@@ -1231,10 +1206,6 @@ static int __dev_close_many(struct list_head *head) | |||
1231 | might_sleep(); | 1206 | might_sleep(); |
1232 | 1207 | ||
1233 | list_for_each_entry(dev, head, unreg_list) { | 1208 | list_for_each_entry(dev, head, unreg_list) { |
1234 | /* | ||
1235 | * Tell people we are going down, so that they can | ||
1236 | * prepare to death, when device is still operating. | ||
1237 | */ | ||
1238 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); | 1209 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); |
1239 | 1210 | ||
1240 | clear_bit(__LINK_STATE_START, &dev->state); | 1211 | clear_bit(__LINK_STATE_START, &dev->state); |
@@ -1263,15 +1234,7 @@ static int __dev_close_many(struct list_head *head) | |||
1263 | if (ops->ndo_stop) | 1234 | if (ops->ndo_stop) |
1264 | ops->ndo_stop(dev); | 1235 | ops->ndo_stop(dev); |
1265 | 1236 | ||
1266 | /* | ||
1267 | * Device is now down. | ||
1268 | */ | ||
1269 | |||
1270 | dev->flags &= ~IFF_UP; | 1237 | dev->flags &= ~IFF_UP; |
1271 | |||
1272 | /* | ||
1273 | * Shutdown NET_DMA | ||
1274 | */ | ||
1275 | net_dmaengine_put(); | 1238 | net_dmaengine_put(); |
1276 | } | 1239 | } |
1277 | 1240 | ||
@@ -1289,7 +1252,7 @@ static int __dev_close(struct net_device *dev) | |||
1289 | return retval; | 1252 | return retval; |
1290 | } | 1253 | } |
1291 | 1254 | ||
1292 | int dev_close_many(struct list_head *head) | 1255 | static int dev_close_many(struct list_head *head) |
1293 | { | 1256 | { |
1294 | struct net_device *dev, *tmp; | 1257 | struct net_device *dev, *tmp; |
1295 | LIST_HEAD(tmp_list); | 1258 | LIST_HEAD(tmp_list); |
@@ -1300,9 +1263,6 @@ int dev_close_many(struct list_head *head) | |||
1300 | 1263 | ||
1301 | __dev_close_many(head); | 1264 | __dev_close_many(head); |
1302 | 1265 | ||
1303 | /* | ||
1304 | * Tell people we are down | ||
1305 | */ | ||
1306 | list_for_each_entry(dev, head, unreg_list) { | 1266 | list_for_each_entry(dev, head, unreg_list) { |
1307 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | 1267 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
1308 | call_netdevice_notifiers(NETDEV_DOWN, dev); | 1268 | call_netdevice_notifiers(NETDEV_DOWN, dev); |
@@ -1344,14 +1304,17 @@ EXPORT_SYMBOL(dev_close); | |||
1344 | */ | 1304 | */ |
1345 | void dev_disable_lro(struct net_device *dev) | 1305 | void dev_disable_lro(struct net_device *dev) |
1346 | { | 1306 | { |
1347 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags && | 1307 | u32 flags; |
1348 | dev->ethtool_ops->set_flags) { | 1308 | |
1349 | u32 flags = dev->ethtool_ops->get_flags(dev); | 1309 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags) |
1350 | if (flags & ETH_FLAG_LRO) { | 1310 | flags = dev->ethtool_ops->get_flags(dev); |
1351 | flags &= ~ETH_FLAG_LRO; | 1311 | else |
1352 | dev->ethtool_ops->set_flags(dev, flags); | 1312 | flags = ethtool_op_get_flags(dev); |
1353 | } | 1313 | |
1354 | } | 1314 | if (!(flags & ETH_FLAG_LRO)) |
1315 | return; | ||
1316 | |||
1317 | __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); | ||
1355 | WARN_ON(dev->features & NETIF_F_LRO); | 1318 | WARN_ON(dev->features & NETIF_F_LRO); |
1356 | } | 1319 | } |
1357 | EXPORT_SYMBOL(dev_disable_lro); | 1320 | EXPORT_SYMBOL(dev_disable_lro); |
@@ -1359,11 +1322,6 @@ EXPORT_SYMBOL(dev_disable_lro); | |||
1359 | 1322 | ||
1360 | static int dev_boot_phase = 1; | 1323 | static int dev_boot_phase = 1; |
1361 | 1324 | ||
1362 | /* | ||
1363 | * Device change register/unregister. These are not inline or static | ||
1364 | * as we export them to the world. | ||
1365 | */ | ||
1366 | |||
1367 | /** | 1325 | /** |
1368 | * register_netdevice_notifier - register a network notifier block | 1326 | * register_netdevice_notifier - register a network notifier block |
1369 | * @nb: notifier | 1327 | * @nb: notifier |
@@ -1465,6 +1423,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | |||
1465 | ASSERT_RTNL(); | 1423 | ASSERT_RTNL(); |
1466 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1424 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1467 | } | 1425 | } |
1426 | EXPORT_SYMBOL(call_netdevice_notifiers); | ||
1468 | 1427 | ||
1469 | /* When > 0 there are consumers of rx skb time stamps */ | 1428 | /* When > 0 there are consumers of rx skb time stamps */ |
1470 | static atomic_t netstamp_needed = ATOMIC_INIT(0); | 1429 | static atomic_t netstamp_needed = ATOMIC_INIT(0); |
@@ -1495,6 +1454,27 @@ static inline void net_timestamp_check(struct sk_buff *skb) | |||
1495 | __net_timestamp(skb); | 1454 | __net_timestamp(skb); |
1496 | } | 1455 | } |
1497 | 1456 | ||
1457 | static inline bool is_skb_forwardable(struct net_device *dev, | ||
1458 | struct sk_buff *skb) | ||
1459 | { | ||
1460 | unsigned int len; | ||
1461 | |||
1462 | if (!(dev->flags & IFF_UP)) | ||
1463 | return false; | ||
1464 | |||
1465 | len = dev->mtu + dev->hard_header_len + VLAN_HLEN; | ||
1466 | if (skb->len <= len) | ||
1467 | return true; | ||
1468 | |||
1469 | /* if TSO is enabled, we don't care about the length as the packet | ||
1470 | * could be forwarded without being segmented before | ||
1471 | */ | ||
1472 | if (skb_is_gso(skb)) | ||
1473 | return true; | ||
1474 | |||
1475 | return false; | ||
1476 | } | ||
1477 | |||
1498 | /** | 1478 | /** |
1499 | * dev_forward_skb - loopback an skb to another netif | 1479 | * dev_forward_skb - loopback an skb to another netif |
1500 | * | 1480 | * |
@@ -1518,8 +1498,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1518 | skb_orphan(skb); | 1498 | skb_orphan(skb); |
1519 | nf_reset(skb); | 1499 | nf_reset(skb); |
1520 | 1500 | ||
1521 | if (unlikely(!(dev->flags & IFF_UP) || | 1501 | if (unlikely(!is_skb_forwardable(dev, skb))) { |
1522 | (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) { | ||
1523 | atomic_long_inc(&dev->rx_dropped); | 1502 | atomic_long_inc(&dev->rx_dropped); |
1524 | kfree_skb(skb); | 1503 | kfree_skb(skb); |
1525 | return NET_RX_DROP; | 1504 | return NET_RX_DROP; |
@@ -1597,6 +1576,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1597 | rcu_read_unlock(); | 1576 | rcu_read_unlock(); |
1598 | } | 1577 | } |
1599 | 1578 | ||
1579 | /* netif_setup_tc - Handle tc mappings on real_num_tx_queues change | ||
1580 | * @dev: Network device | ||
1581 | * @txq: number of queues available | ||
1582 | * | ||
1583 | * If real_num_tx_queues is changed the tc mappings may no longer be | ||
1584 | * valid. To resolve this verify the tc mapping remains valid and if | ||
1585 | * not NULL the mapping. With no priorities mapping to this | ||
1586 | * offset/count pair it will no longer be used. In the worst case TC0 | ||
1587 | * is invalid nothing can be done so disable priority mappings. If is | ||
1588 | * expected that drivers will fix this mapping if they can before | ||
1589 | * calling netif_set_real_num_tx_queues. | ||
1590 | */ | ||
1591 | static void netif_setup_tc(struct net_device *dev, unsigned int txq) | ||
1592 | { | ||
1593 | int i; | ||
1594 | struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; | ||
1595 | |||
1596 | /* If TC0 is invalidated disable TC mapping */ | ||
1597 | if (tc->offset + tc->count > txq) { | ||
1598 | pr_warning("Number of in use tx queues changed " | ||
1599 | "invalidating tc mappings. Priority " | ||
1600 | "traffic classification disabled!\n"); | ||
1601 | dev->num_tc = 0; | ||
1602 | return; | ||
1603 | } | ||
1604 | |||
1605 | /* Invalidated prio to tc mappings set to TC0 */ | ||
1606 | for (i = 1; i < TC_BITMASK + 1; i++) { | ||
1607 | int q = netdev_get_prio_tc_map(dev, i); | ||
1608 | |||
1609 | tc = &dev->tc_to_txq[q]; | ||
1610 | if (tc->offset + tc->count > txq) { | ||
1611 | pr_warning("Number of in use tx queues " | ||
1612 | "changed. Priority %i to tc " | ||
1613 | "mapping %i is no longer valid " | ||
1614 | "setting map to 0\n", | ||
1615 | i, q); | ||
1616 | netdev_set_prio_tc_map(dev, i, 0); | ||
1617 | } | ||
1618 | } | ||
1619 | } | ||
1620 | |||
1600 | /* | 1621 | /* |
1601 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 1622 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
1602 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 1623 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
@@ -1608,7 +1629,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | |||
1608 | if (txq < 1 || txq > dev->num_tx_queues) | 1629 | if (txq < 1 || txq > dev->num_tx_queues) |
1609 | return -EINVAL; | 1630 | return -EINVAL; |
1610 | 1631 | ||
1611 | if (dev->reg_state == NETREG_REGISTERED) { | 1632 | if (dev->reg_state == NETREG_REGISTERED || |
1633 | dev->reg_state == NETREG_UNREGISTERING) { | ||
1612 | ASSERT_RTNL(); | 1634 | ASSERT_RTNL(); |
1613 | 1635 | ||
1614 | rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, | 1636 | rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, |
@@ -1616,6 +1638,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | |||
1616 | if (rc) | 1638 | if (rc) |
1617 | return rc; | 1639 | return rc; |
1618 | 1640 | ||
1641 | if (dev->num_tc) | ||
1642 | netif_setup_tc(dev, txq); | ||
1643 | |||
1619 | if (txq < dev->real_num_tx_queues) | 1644 | if (txq < dev->real_num_tx_queues) |
1620 | qdisc_reset_all_tx_gt(dev, txq); | 1645 | qdisc_reset_all_tx_gt(dev, txq); |
1621 | } | 1646 | } |
@@ -1815,7 +1840,7 @@ EXPORT_SYMBOL(skb_checksum_help); | |||
1815 | * It may return NULL if the skb requires no segmentation. This is | 1840 | * It may return NULL if the skb requires no segmentation. This is |
1816 | * only possible when GSO is used for verifying header integrity. | 1841 | * only possible when GSO is used for verifying header integrity. |
1817 | */ | 1842 | */ |
1818 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | 1843 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) |
1819 | { | 1844 | { |
1820 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 1845 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1821 | struct packet_type *ptype; | 1846 | struct packet_type *ptype; |
@@ -2003,7 +2028,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) | |||
2003 | protocol == htons(ETH_P_FCOE))); | 2028 | protocol == htons(ETH_P_FCOE))); |
2004 | } | 2029 | } |
2005 | 2030 | ||
2006 | static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) | 2031 | static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) |
2007 | { | 2032 | { |
2008 | if (!can_checksum_protocol(features, protocol)) { | 2033 | if (!can_checksum_protocol(features, protocol)) { |
2009 | features &= ~NETIF_F_ALL_CSUM; | 2034 | features &= ~NETIF_F_ALL_CSUM; |
@@ -2015,10 +2040,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features | |||
2015 | return features; | 2040 | return features; |
2016 | } | 2041 | } |
2017 | 2042 | ||
2018 | int netif_skb_features(struct sk_buff *skb) | 2043 | u32 netif_skb_features(struct sk_buff *skb) |
2019 | { | 2044 | { |
2020 | __be16 protocol = skb->protocol; | 2045 | __be16 protocol = skb->protocol; |
2021 | int features = skb->dev->features; | 2046 | u32 features = skb->dev->features; |
2022 | 2047 | ||
2023 | if (protocol == htons(ETH_P_8021Q)) { | 2048 | if (protocol == htons(ETH_P_8021Q)) { |
2024 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 2049 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
@@ -2063,10 +2088,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
2063 | int rc = NETDEV_TX_OK; | 2088 | int rc = NETDEV_TX_OK; |
2064 | 2089 | ||
2065 | if (likely(!skb->next)) { | 2090 | if (likely(!skb->next)) { |
2066 | int features; | 2091 | u32 features; |
2067 | 2092 | ||
2068 | /* | 2093 | /* |
2069 | * If device doesnt need skb->dst, release it right now while | 2094 | * If device doesn't need skb->dst, release it right now while |
2070 | * its hot in this cpu cache | 2095 | * its hot in this cpu cache |
2071 | */ | 2096 | */ |
2072 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2097 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
@@ -2126,7 +2151,7 @@ gso: | |||
2126 | nskb->next = NULL; | 2151 | nskb->next = NULL; |
2127 | 2152 | ||
2128 | /* | 2153 | /* |
2129 | * If device doesnt need nskb->dst, release it right now while | 2154 | * If device doesn't need nskb->dst, release it right now while |
2130 | * its hot in this cpu cache | 2155 | * its hot in this cpu cache |
2131 | */ | 2156 | */ |
2132 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2157 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
@@ -2165,6 +2190,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | |||
2165 | unsigned int num_tx_queues) | 2190 | unsigned int num_tx_queues) |
2166 | { | 2191 | { |
2167 | u32 hash; | 2192 | u32 hash; |
2193 | u16 qoffset = 0; | ||
2194 | u16 qcount = num_tx_queues; | ||
2168 | 2195 | ||
2169 | if (skb_rx_queue_recorded(skb)) { | 2196 | if (skb_rx_queue_recorded(skb)) { |
2170 | hash = skb_get_rx_queue(skb); | 2197 | hash = skb_get_rx_queue(skb); |
@@ -2173,13 +2200,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | |||
2173 | return hash; | 2200 | return hash; |
2174 | } | 2201 | } |
2175 | 2202 | ||
2203 | if (dev->num_tc) { | ||
2204 | u8 tc = netdev_get_prio_tc_map(dev, skb->priority); | ||
2205 | qoffset = dev->tc_to_txq[tc].offset; | ||
2206 | qcount = dev->tc_to_txq[tc].count; | ||
2207 | } | ||
2208 | |||
2176 | if (skb->sk && skb->sk->sk_hash) | 2209 | if (skb->sk && skb->sk->sk_hash) |
2177 | hash = skb->sk->sk_hash; | 2210 | hash = skb->sk->sk_hash; |
2178 | else | 2211 | else |
2179 | hash = (__force u16) skb->protocol ^ skb->rxhash; | 2212 | hash = (__force u16) skb->protocol ^ skb->rxhash; |
2180 | hash = jhash_1word(hash, hashrnd); | 2213 | hash = jhash_1word(hash, hashrnd); |
2181 | 2214 | ||
2182 | return (u16) (((u64) hash * num_tx_queues) >> 32); | 2215 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; |
2183 | } | 2216 | } |
2184 | EXPORT_SYMBOL(__skb_tx_hash); | 2217 | EXPORT_SYMBOL(__skb_tx_hash); |
2185 | 2218 | ||
@@ -2276,15 +2309,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2276 | struct netdev_queue *txq) | 2309 | struct netdev_queue *txq) |
2277 | { | 2310 | { |
2278 | spinlock_t *root_lock = qdisc_lock(q); | 2311 | spinlock_t *root_lock = qdisc_lock(q); |
2279 | bool contended = qdisc_is_running(q); | 2312 | bool contended; |
2280 | int rc; | 2313 | int rc; |
2281 | 2314 | ||
2315 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2316 | qdisc_calculate_pkt_len(skb, q); | ||
2282 | /* | 2317 | /* |
2283 | * Heuristic to force contended enqueues to serialize on a | 2318 | * Heuristic to force contended enqueues to serialize on a |
2284 | * separate lock before trying to get qdisc main lock. | 2319 | * separate lock before trying to get qdisc main lock. |
2285 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often | 2320 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often |
2286 | * and dequeue packets faster. | 2321 | * and dequeue packets faster. |
2287 | */ | 2322 | */ |
2323 | contended = qdisc_is_running(q); | ||
2288 | if (unlikely(contended)) | 2324 | if (unlikely(contended)) |
2289 | spin_lock(&q->busylock); | 2325 | spin_lock(&q->busylock); |
2290 | 2326 | ||
@@ -2302,7 +2338,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2302 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | 2338 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2303 | skb_dst_force(skb); | 2339 | skb_dst_force(skb); |
2304 | 2340 | ||
2305 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2306 | qdisc_bstats_update(q, skb); | 2341 | qdisc_bstats_update(q, skb); |
2307 | 2342 | ||
2308 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { | 2343 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
@@ -2317,7 +2352,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2317 | rc = NET_XMIT_SUCCESS; | 2352 | rc = NET_XMIT_SUCCESS; |
2318 | } else { | 2353 | } else { |
2319 | skb_dst_force(skb); | 2354 | skb_dst_force(skb); |
2320 | rc = qdisc_enqueue_root(skb, q); | 2355 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; |
2321 | if (qdisc_run_begin(q)) { | 2356 | if (qdisc_run_begin(q)) { |
2322 | if (unlikely(contended)) { | 2357 | if (unlikely(contended)) { |
2323 | spin_unlock(&q->busylock); | 2358 | spin_unlock(&q->busylock); |
@@ -2536,6 +2571,54 @@ EXPORT_SYMBOL(__skb_get_rxhash); | |||
2536 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 2571 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
2537 | EXPORT_SYMBOL(rps_sock_flow_table); | 2572 | EXPORT_SYMBOL(rps_sock_flow_table); |
2538 | 2573 | ||
2574 | static struct rps_dev_flow * | ||
2575 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2576 | struct rps_dev_flow *rflow, u16 next_cpu) | ||
2577 | { | ||
2578 | u16 tcpu; | ||
2579 | |||
2580 | tcpu = rflow->cpu = next_cpu; | ||
2581 | if (tcpu != RPS_NO_CPU) { | ||
2582 | #ifdef CONFIG_RFS_ACCEL | ||
2583 | struct netdev_rx_queue *rxqueue; | ||
2584 | struct rps_dev_flow_table *flow_table; | ||
2585 | struct rps_dev_flow *old_rflow; | ||
2586 | u32 flow_id; | ||
2587 | u16 rxq_index; | ||
2588 | int rc; | ||
2589 | |||
2590 | /* Should we steer this flow to a different hardware queue? */ | ||
2591 | if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || | ||
2592 | !(dev->features & NETIF_F_NTUPLE)) | ||
2593 | goto out; | ||
2594 | rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); | ||
2595 | if (rxq_index == skb_get_rx_queue(skb)) | ||
2596 | goto out; | ||
2597 | |||
2598 | rxqueue = dev->_rx + rxq_index; | ||
2599 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2600 | if (!flow_table) | ||
2601 | goto out; | ||
2602 | flow_id = skb->rxhash & flow_table->mask; | ||
2603 | rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, | ||
2604 | rxq_index, flow_id); | ||
2605 | if (rc < 0) | ||
2606 | goto out; | ||
2607 | old_rflow = rflow; | ||
2608 | rflow = &flow_table->flows[flow_id]; | ||
2609 | rflow->cpu = next_cpu; | ||
2610 | rflow->filter = rc; | ||
2611 | if (old_rflow->filter == rflow->filter) | ||
2612 | old_rflow->filter = RPS_NO_FILTER; | ||
2613 | out: | ||
2614 | #endif | ||
2615 | rflow->last_qtail = | ||
2616 | per_cpu(softnet_data, tcpu).input_queue_head; | ||
2617 | } | ||
2618 | |||
2619 | return rflow; | ||
2620 | } | ||
2621 | |||
2539 | /* | 2622 | /* |
2540 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2623 | * get_rps_cpu is called from netif_receive_skb and returns the target |
2541 | * CPU from the RPS map of the receiving queue for a given skb. | 2624 | * CPU from the RPS map of the receiving queue for a given skb. |
@@ -2607,12 +2690,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
2607 | if (unlikely(tcpu != next_cpu) && | 2690 | if (unlikely(tcpu != next_cpu) && |
2608 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 2691 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || |
2609 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 2692 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
2610 | rflow->last_qtail)) >= 0)) { | 2693 | rflow->last_qtail)) >= 0)) |
2611 | tcpu = rflow->cpu = next_cpu; | 2694 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
2612 | if (tcpu != RPS_NO_CPU) | 2695 | |
2613 | rflow->last_qtail = per_cpu(softnet_data, | ||
2614 | tcpu).input_queue_head; | ||
2615 | } | ||
2616 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 2696 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { |
2617 | *rflowp = rflow; | 2697 | *rflowp = rflow; |
2618 | cpu = tcpu; | 2698 | cpu = tcpu; |
@@ -2633,6 +2713,46 @@ done: | |||
2633 | return cpu; | 2713 | return cpu; |
2634 | } | 2714 | } |
2635 | 2715 | ||
2716 | #ifdef CONFIG_RFS_ACCEL | ||
2717 | |||
2718 | /** | ||
2719 | * rps_may_expire_flow - check whether an RFS hardware filter may be removed | ||
2720 | * @dev: Device on which the filter was set | ||
2721 | * @rxq_index: RX queue index | ||
2722 | * @flow_id: Flow ID passed to ndo_rx_flow_steer() | ||
2723 | * @filter_id: Filter ID returned by ndo_rx_flow_steer() | ||
2724 | * | ||
2725 | * Drivers that implement ndo_rx_flow_steer() should periodically call | ||
2726 | * this function for each installed filter and remove the filters for | ||
2727 | * which it returns %true. | ||
2728 | */ | ||
2729 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | ||
2730 | u32 flow_id, u16 filter_id) | ||
2731 | { | ||
2732 | struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; | ||
2733 | struct rps_dev_flow_table *flow_table; | ||
2734 | struct rps_dev_flow *rflow; | ||
2735 | bool expire = true; | ||
2736 | int cpu; | ||
2737 | |||
2738 | rcu_read_lock(); | ||
2739 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2740 | if (flow_table && flow_id <= flow_table->mask) { | ||
2741 | rflow = &flow_table->flows[flow_id]; | ||
2742 | cpu = ACCESS_ONCE(rflow->cpu); | ||
2743 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | ||
2744 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | ||
2745 | rflow->last_qtail) < | ||
2746 | (int)(10 * flow_table->mask))) | ||
2747 | expire = false; | ||
2748 | } | ||
2749 | rcu_read_unlock(); | ||
2750 | return expire; | ||
2751 | } | ||
2752 | EXPORT_SYMBOL(rps_may_expire_flow); | ||
2753 | |||
2754 | #endif /* CONFIG_RFS_ACCEL */ | ||
2755 | |||
2636 | /* Called from hardirq (IPI) context */ | 2756 | /* Called from hardirq (IPI) context */ |
2637 | static void rps_trigger_softirq(void *data) | 2757 | static void rps_trigger_softirq(void *data) |
2638 | { | 2758 | { |
@@ -2850,8 +2970,8 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | |||
2850 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions | 2970 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions |
2851 | * a compare and 2 stores extra right now if we dont have it on | 2971 | * a compare and 2 stores extra right now if we dont have it on |
2852 | * but have CONFIG_NET_CLS_ACT | 2972 | * but have CONFIG_NET_CLS_ACT |
2853 | * NOTE: This doesnt stop any functionality; if you dont have | 2973 | * NOTE: This doesn't stop any functionality; if you dont have |
2854 | * the ingress scheduler, you just cant add policies on ingress. | 2974 | * the ingress scheduler, you just can't add policies on ingress. |
2855 | * | 2975 | * |
2856 | */ | 2976 | */ |
2857 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) | 2977 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) |
@@ -2920,6 +3040,8 @@ out: | |||
2920 | * on a failure. | 3040 | * on a failure. |
2921 | * | 3041 | * |
2922 | * The caller must hold the rtnl_mutex. | 3042 | * The caller must hold the rtnl_mutex. |
3043 | * | ||
3044 | * For a general description of rx_handler, see enum rx_handler_result. | ||
2923 | */ | 3045 | */ |
2924 | int netdev_rx_handler_register(struct net_device *dev, | 3046 | int netdev_rx_handler_register(struct net_device *dev, |
2925 | rx_handler_func_t *rx_handler, | 3047 | rx_handler_func_t *rx_handler, |
@@ -2954,64 +3076,32 @@ void netdev_rx_handler_unregister(struct net_device *dev) | |||
2954 | } | 3076 | } |
2955 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | 3077 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); |
2956 | 3078 | ||
2957 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, | 3079 | static void vlan_on_bond_hook(struct sk_buff *skb) |
2958 | struct net_device *master) | ||
2959 | { | ||
2960 | if (skb->pkt_type == PACKET_HOST) { | ||
2961 | u16 *dest = (u16 *) eth_hdr(skb)->h_dest; | ||
2962 | |||
2963 | memcpy(dest, master->dev_addr, ETH_ALEN); | ||
2964 | } | ||
2965 | } | ||
2966 | |||
2967 | /* On bonding slaves other than the currently active slave, suppress | ||
2968 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and | ||
2969 | * ARP on active-backup slaves with arp_validate enabled. | ||
2970 | */ | ||
2971 | int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) | ||
2972 | { | 3080 | { |
2973 | struct net_device *dev = skb->dev; | 3081 | /* |
2974 | 3082 | * Make sure ARP frames received on VLAN interfaces stacked on | |
2975 | if (master->priv_flags & IFF_MASTER_ARPMON) | 3083 | * bonding interfaces still make their way to any base bonding |
2976 | dev->last_rx = jiffies; | 3084 | * device that may have registered for a specific ptype. |
2977 | 3085 | */ | |
2978 | if ((master->priv_flags & IFF_MASTER_ALB) && | 3086 | if (skb->dev->priv_flags & IFF_802_1Q_VLAN && |
2979 | (master->priv_flags & IFF_BRIDGE_PORT)) { | 3087 | vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && |
2980 | /* Do address unmangle. The local destination address | 3088 | skb->protocol == htons(ETH_P_ARP)) { |
2981 | * will be always the one master has. Provides the right | 3089 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
2982 | * functionality in a bridge. | ||
2983 | */ | ||
2984 | skb_bond_set_mac_by_master(skb, master); | ||
2985 | } | ||
2986 | |||
2987 | if (dev->priv_flags & IFF_SLAVE_INACTIVE) { | ||
2988 | if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && | ||
2989 | skb->protocol == __cpu_to_be16(ETH_P_ARP)) | ||
2990 | return 0; | ||
2991 | |||
2992 | if (master->priv_flags & IFF_MASTER_ALB) { | ||
2993 | if (skb->pkt_type != PACKET_BROADCAST && | ||
2994 | skb->pkt_type != PACKET_MULTICAST) | ||
2995 | return 0; | ||
2996 | } | ||
2997 | if (master->priv_flags & IFF_MASTER_8023AD && | ||
2998 | skb->protocol == __cpu_to_be16(ETH_P_SLOW)) | ||
2999 | return 0; | ||
3000 | 3090 | ||
3001 | return 1; | 3091 | if (!skb2) |
3092 | return; | ||
3093 | skb2->dev = vlan_dev_real_dev(skb->dev); | ||
3094 | netif_rx(skb2); | ||
3002 | } | 3095 | } |
3003 | return 0; | ||
3004 | } | 3096 | } |
3005 | EXPORT_SYMBOL(__skb_bond_should_drop); | ||
3006 | 3097 | ||
3007 | static int __netif_receive_skb(struct sk_buff *skb) | 3098 | static int __netif_receive_skb(struct sk_buff *skb) |
3008 | { | 3099 | { |
3009 | struct packet_type *ptype, *pt_prev; | 3100 | struct packet_type *ptype, *pt_prev; |
3010 | rx_handler_func_t *rx_handler; | 3101 | rx_handler_func_t *rx_handler; |
3011 | struct net_device *orig_dev; | 3102 | struct net_device *orig_dev; |
3012 | struct net_device *master; | 3103 | struct net_device *null_or_dev; |
3013 | struct net_device *null_or_orig; | 3104 | bool deliver_exact = false; |
3014 | struct net_device *orig_or_bond; | ||
3015 | int ret = NET_RX_DROP; | 3105 | int ret = NET_RX_DROP; |
3016 | __be16 type; | 3106 | __be16 type; |
3017 | 3107 | ||
@@ -3026,28 +3116,8 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3026 | 3116 | ||
3027 | if (!skb->skb_iif) | 3117 | if (!skb->skb_iif) |
3028 | skb->skb_iif = skb->dev->ifindex; | 3118 | skb->skb_iif = skb->dev->ifindex; |
3029 | |||
3030 | /* | ||
3031 | * bonding note: skbs received on inactive slaves should only | ||
3032 | * be delivered to pkt handlers that are exact matches. Also | ||
3033 | * the deliver_no_wcard flag will be set. If packet handlers | ||
3034 | * are sensitive to duplicate packets these skbs will need to | ||
3035 | * be dropped at the handler. | ||
3036 | */ | ||
3037 | null_or_orig = NULL; | ||
3038 | orig_dev = skb->dev; | 3119 | orig_dev = skb->dev; |
3039 | master = ACCESS_ONCE(orig_dev->master); | ||
3040 | if (skb->deliver_no_wcard) | ||
3041 | null_or_orig = orig_dev; | ||
3042 | else if (master) { | ||
3043 | if (skb_bond_should_drop(skb, master)) { | ||
3044 | skb->deliver_no_wcard = 1; | ||
3045 | null_or_orig = orig_dev; /* deliver only exact match */ | ||
3046 | } else | ||
3047 | skb->dev = master; | ||
3048 | } | ||
3049 | 3120 | ||
3050 | __this_cpu_inc(softnet_data.processed); | ||
3051 | skb_reset_network_header(skb); | 3121 | skb_reset_network_header(skb); |
3052 | skb_reset_transport_header(skb); | 3122 | skb_reset_transport_header(skb); |
3053 | skb->mac_len = skb->network_header - skb->mac_header; | 3123 | skb->mac_len = skb->network_header - skb->mac_header; |
@@ -3056,6 +3126,10 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3056 | 3126 | ||
3057 | rcu_read_lock(); | 3127 | rcu_read_lock(); |
3058 | 3128 | ||
3129 | another_round: | ||
3130 | |||
3131 | __this_cpu_inc(softnet_data.processed); | ||
3132 | |||
3059 | #ifdef CONFIG_NET_CLS_ACT | 3133 | #ifdef CONFIG_NET_CLS_ACT |
3060 | if (skb->tc_verd & TC_NCLS) { | 3134 | if (skb->tc_verd & TC_NCLS) { |
3061 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | 3135 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
@@ -3064,8 +3138,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3064 | #endif | 3138 | #endif |
3065 | 3139 | ||
3066 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3140 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
3067 | if (ptype->dev == null_or_orig || ptype->dev == skb->dev || | 3141 | if (!ptype->dev || ptype->dev == skb->dev) { |
3068 | ptype->dev == orig_dev) { | ||
3069 | if (pt_prev) | 3142 | if (pt_prev) |
3070 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3143 | ret = deliver_skb(skb, pt_prev, orig_dev); |
3071 | pt_prev = ptype; | 3144 | pt_prev = ptype; |
@@ -3079,16 +3152,24 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3079 | ncls: | 3152 | ncls: |
3080 | #endif | 3153 | #endif |
3081 | 3154 | ||
3082 | /* Handle special case of bridge or macvlan */ | ||
3083 | rx_handler = rcu_dereference(skb->dev->rx_handler); | 3155 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
3084 | if (rx_handler) { | 3156 | if (rx_handler) { |
3085 | if (pt_prev) { | 3157 | if (pt_prev) { |
3086 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3158 | ret = deliver_skb(skb, pt_prev, orig_dev); |
3087 | pt_prev = NULL; | 3159 | pt_prev = NULL; |
3088 | } | 3160 | } |
3089 | skb = rx_handler(skb); | 3161 | switch (rx_handler(&skb)) { |
3090 | if (!skb) | 3162 | case RX_HANDLER_CONSUMED: |
3091 | goto out; | 3163 | goto out; |
3164 | case RX_HANDLER_ANOTHER: | ||
3165 | goto another_round; | ||
3166 | case RX_HANDLER_EXACT: | ||
3167 | deliver_exact = true; | ||
3168 | case RX_HANDLER_PASS: | ||
3169 | break; | ||
3170 | default: | ||
3171 | BUG(); | ||
3172 | } | ||
3092 | } | 3173 | } |
3093 | 3174 | ||
3094 | if (vlan_tx_tag_present(skb)) { | 3175 | if (vlan_tx_tag_present(skb)) { |
@@ -3103,24 +3184,17 @@ ncls: | |||
3103 | goto out; | 3184 | goto out; |
3104 | } | 3185 | } |
3105 | 3186 | ||
3106 | /* | 3187 | vlan_on_bond_hook(skb); |
3107 | * Make sure frames received on VLAN interfaces stacked on | 3188 | |
3108 | * bonding interfaces still make their way to any base bonding | 3189 | /* deliver only exact match when indicated */ |
3109 | * device that may have registered for a specific ptype. The | 3190 | null_or_dev = deliver_exact ? skb->dev : NULL; |
3110 | * handler may have to adjust skb->dev and orig_dev. | ||
3111 | */ | ||
3112 | orig_or_bond = orig_dev; | ||
3113 | if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && | ||
3114 | (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { | ||
3115 | orig_or_bond = vlan_dev_real_dev(skb->dev); | ||
3116 | } | ||
3117 | 3191 | ||
3118 | type = skb->protocol; | 3192 | type = skb->protocol; |
3119 | list_for_each_entry_rcu(ptype, | 3193 | list_for_each_entry_rcu(ptype, |
3120 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { | 3194 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
3121 | if (ptype->type == type && (ptype->dev == null_or_orig || | 3195 | if (ptype->type == type && |
3122 | ptype->dev == skb->dev || ptype->dev == orig_dev || | 3196 | (ptype->dev == null_or_dev || ptype->dev == skb->dev || |
3123 | ptype->dev == orig_or_bond)) { | 3197 | ptype->dev == orig_dev)) { |
3124 | if (pt_prev) | 3198 | if (pt_prev) |
3125 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3199 | ret = deliver_skb(skb, pt_prev, orig_dev); |
3126 | pt_prev = ptype; | 3200 | pt_prev = ptype; |
@@ -3726,7 +3800,7 @@ static void net_rx_action(struct softirq_action *h) | |||
3726 | * with netpoll's poll_napi(). Only the entity which | 3800 | * with netpoll's poll_napi(). Only the entity which |
3727 | * obtains the lock and sees NAPI_STATE_SCHED set will | 3801 | * obtains the lock and sees NAPI_STATE_SCHED set will |
3728 | * actually make the ->poll() call. Therefore we avoid | 3802 | * actually make the ->poll() call. Therefore we avoid |
3729 | * accidently calling ->poll() when NAPI is not scheduled. | 3803 | * accidentally calling ->poll() when NAPI is not scheduled. |
3730 | */ | 3804 | */ |
3731 | work = 0; | 3805 | work = 0; |
3732 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { | 3806 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
@@ -3917,12 +3991,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) | |||
3917 | 3991 | ||
3918 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 3992 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3919 | { | 3993 | { |
3920 | struct net_device *dev = (v == SEQ_START_TOKEN) ? | 3994 | struct net_device *dev = v; |
3921 | first_net_device(seq_file_net(seq)) : | 3995 | |
3922 | next_net_device((struct net_device *)v); | 3996 | if (v == SEQ_START_TOKEN) |
3997 | dev = first_net_device_rcu(seq_file_net(seq)); | ||
3998 | else | ||
3999 | dev = next_net_device_rcu(dev); | ||
3923 | 4000 | ||
3924 | ++*pos; | 4001 | ++*pos; |
3925 | return rcu_dereference(dev); | 4002 | return dev; |
3926 | } | 4003 | } |
3927 | 4004 | ||
3928 | void dev_seq_stop(struct seq_file *seq, void *v) | 4005 | void dev_seq_stop(struct seq_file *seq, void *v) |
@@ -4206,15 +4283,14 @@ static int __init dev_proc_init(void) | |||
4206 | 4283 | ||
4207 | 4284 | ||
4208 | /** | 4285 | /** |
4209 | * netdev_set_master - set up master/slave pair | 4286 | * netdev_set_master - set up master pointer |
4210 | * @slave: slave device | 4287 | * @slave: slave device |
4211 | * @master: new master device | 4288 | * @master: new master device |
4212 | * | 4289 | * |
4213 | * Changes the master device of the slave. Pass %NULL to break the | 4290 | * Changes the master device of the slave. Pass %NULL to break the |
4214 | * bonding. The caller must hold the RTNL semaphore. On a failure | 4291 | * bonding. The caller must hold the RTNL semaphore. On a failure |
4215 | * a negative errno code is returned. On success the reference counts | 4292 | * a negative errno code is returned. On success the reference counts |
4216 | * are adjusted, %RTM_NEWLINK is sent to the routing socket and the | 4293 | * are adjusted and the function returns zero. |
4217 | * function returns zero. | ||
4218 | */ | 4294 | */ |
4219 | int netdev_set_master(struct net_device *slave, struct net_device *master) | 4295 | int netdev_set_master(struct net_device *slave, struct net_device *master) |
4220 | { | 4296 | { |
@@ -4234,6 +4310,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
4234 | synchronize_net(); | 4310 | synchronize_net(); |
4235 | dev_put(old); | 4311 | dev_put(old); |
4236 | } | 4312 | } |
4313 | return 0; | ||
4314 | } | ||
4315 | EXPORT_SYMBOL(netdev_set_master); | ||
4316 | |||
4317 | /** | ||
4318 | * netdev_set_bond_master - set up bonding master/slave pair | ||
4319 | * @slave: slave device | ||
4320 | * @master: new master device | ||
4321 | * | ||
4322 | * Changes the master device of the slave. Pass %NULL to break the | ||
4323 | * bonding. The caller must hold the RTNL semaphore. On a failure | ||
4324 | * a negative errno code is returned. On success %RTM_NEWLINK is sent | ||
4325 | * to the routing socket and the function returns zero. | ||
4326 | */ | ||
4327 | int netdev_set_bond_master(struct net_device *slave, struct net_device *master) | ||
4328 | { | ||
4329 | int err; | ||
4330 | |||
4331 | ASSERT_RTNL(); | ||
4332 | |||
4333 | err = netdev_set_master(slave, master); | ||
4334 | if (err) | ||
4335 | return err; | ||
4237 | if (master) | 4336 | if (master) |
4238 | slave->flags |= IFF_SLAVE; | 4337 | slave->flags |= IFF_SLAVE; |
4239 | else | 4338 | else |
@@ -4242,7 +4341,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
4242 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); | 4341 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); |
4243 | return 0; | 4342 | return 0; |
4244 | } | 4343 | } |
4245 | EXPORT_SYMBOL(netdev_set_master); | 4344 | EXPORT_SYMBOL(netdev_set_bond_master); |
4246 | 4345 | ||
4247 | static void dev_change_rx_flags(struct net_device *dev, int flags) | 4346 | static void dev_change_rx_flags(struct net_device *dev, int flags) |
4248 | { | 4347 | { |
@@ -4579,6 +4678,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) | |||
4579 | EXPORT_SYMBOL(dev_set_mtu); | 4678 | EXPORT_SYMBOL(dev_set_mtu); |
4580 | 4679 | ||
4581 | /** | 4680 | /** |
4681 | * dev_set_group - Change group this device belongs to | ||
4682 | * @dev: device | ||
4683 | * @new_group: group this device should belong to | ||
4684 | */ | ||
4685 | void dev_set_group(struct net_device *dev, int new_group) | ||
4686 | { | ||
4687 | dev->group = new_group; | ||
4688 | } | ||
4689 | EXPORT_SYMBOL(dev_set_group); | ||
4690 | |||
4691 | /** | ||
4582 | * dev_set_mac_address - Change Media Access Control Address | 4692 | * dev_set_mac_address - Change Media Access Control Address |
4583 | * @dev: device | 4693 | * @dev: device |
4584 | * @sa: new address | 4694 | * @sa: new address |
@@ -4663,7 +4773,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm | |||
4663 | * is never reached | 4773 | * is never reached |
4664 | */ | 4774 | */ |
4665 | WARN_ON(1); | 4775 | WARN_ON(1); |
4666 | err = -EINVAL; | 4776 | err = -ENOTTY; |
4667 | break; | 4777 | break; |
4668 | 4778 | ||
4669 | } | 4779 | } |
@@ -4931,7 +5041,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4931 | /* Set the per device memory buffer space. | 5041 | /* Set the per device memory buffer space. |
4932 | * Not applicable in our case */ | 5042 | * Not applicable in our case */ |
4933 | case SIOCSIFLINK: | 5043 | case SIOCSIFLINK: |
4934 | return -EINVAL; | 5044 | return -ENOTTY; |
4935 | 5045 | ||
4936 | /* | 5046 | /* |
4937 | * Unknown or private ioctl. | 5047 | * Unknown or private ioctl. |
@@ -4952,7 +5062,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4952 | /* Take care of Wireless Extensions */ | 5062 | /* Take care of Wireless Extensions */ |
4953 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) | 5063 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) |
4954 | return wext_handle_ioctl(net, &ifr, cmd, arg); | 5064 | return wext_handle_ioctl(net, &ifr, cmd, arg); |
4955 | return -EINVAL; | 5065 | return -ENOTTY; |
4956 | } | 5066 | } |
4957 | } | 5067 | } |
4958 | 5068 | ||
@@ -5069,41 +5179,59 @@ static void rollback_registered(struct net_device *dev) | |||
5069 | list_del(&single); | 5179 | list_del(&single); |
5070 | } | 5180 | } |
5071 | 5181 | ||
5072 | unsigned long netdev_fix_features(unsigned long features, const char *name) | 5182 | u32 netdev_fix_features(struct net_device *dev, u32 features) |
5073 | { | 5183 | { |
5184 | /* Fix illegal checksum combinations */ | ||
5185 | if ((features & NETIF_F_HW_CSUM) && | ||
5186 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | ||
5187 | netdev_info(dev, "mixed HW and IP checksum settings.\n"); | ||
5188 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | ||
5189 | } | ||
5190 | |||
5191 | if ((features & NETIF_F_NO_CSUM) && | ||
5192 | (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | ||
5193 | netdev_info(dev, "mixed no checksumming and other settings.\n"); | ||
5194 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); | ||
5195 | } | ||
5196 | |||
5074 | /* Fix illegal SG+CSUM combinations. */ | 5197 | /* Fix illegal SG+CSUM combinations. */ |
5075 | if ((features & NETIF_F_SG) && | 5198 | if ((features & NETIF_F_SG) && |
5076 | !(features & NETIF_F_ALL_CSUM)) { | 5199 | !(features & NETIF_F_ALL_CSUM)) { |
5077 | if (name) | 5200 | netdev_info(dev, |
5078 | printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " | 5201 | "Dropping NETIF_F_SG since no checksum feature.\n"); |
5079 | "checksum feature.\n", name); | ||
5080 | features &= ~NETIF_F_SG; | 5202 | features &= ~NETIF_F_SG; |
5081 | } | 5203 | } |
5082 | 5204 | ||
5083 | /* TSO requires that SG is present as well. */ | 5205 | /* TSO requires that SG is present as well. */ |
5084 | if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { | 5206 | if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { |
5085 | if (name) | 5207 | netdev_info(dev, "Dropping TSO features since no SG feature.\n"); |
5086 | printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " | 5208 | features &= ~NETIF_F_ALL_TSO; |
5087 | "SG feature.\n", name); | 5209 | } |
5088 | features &= ~NETIF_F_TSO; | 5210 | |
5211 | /* TSO ECN requires that TSO is present as well. */ | ||
5212 | if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) | ||
5213 | features &= ~NETIF_F_TSO_ECN; | ||
5214 | |||
5215 | /* Software GSO depends on SG. */ | ||
5216 | if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { | ||
5217 | netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); | ||
5218 | features &= ~NETIF_F_GSO; | ||
5089 | } | 5219 | } |
5090 | 5220 | ||
5221 | /* UFO needs SG and checksumming */ | ||
5091 | if (features & NETIF_F_UFO) { | 5222 | if (features & NETIF_F_UFO) { |
5092 | /* maybe split UFO into V4 and V6? */ | 5223 | /* maybe split UFO into V4 and V6? */ |
5093 | if (!((features & NETIF_F_GEN_CSUM) || | 5224 | if (!((features & NETIF_F_GEN_CSUM) || |
5094 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) | 5225 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) |
5095 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5226 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
5096 | if (name) | 5227 | netdev_info(dev, |
5097 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " | 5228 | "Dropping NETIF_F_UFO since no checksum offload features.\n"); |
5098 | "since no checksum offload features.\n", | ||
5099 | name); | ||
5100 | features &= ~NETIF_F_UFO; | 5229 | features &= ~NETIF_F_UFO; |
5101 | } | 5230 | } |
5102 | 5231 | ||
5103 | if (!(features & NETIF_F_SG)) { | 5232 | if (!(features & NETIF_F_SG)) { |
5104 | if (name) | 5233 | netdev_info(dev, |
5105 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " | 5234 | "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); |
5106 | "since no NETIF_F_SG feature.\n", name); | ||
5107 | features &= ~NETIF_F_UFO; | 5235 | features &= ~NETIF_F_UFO; |
5108 | } | 5236 | } |
5109 | } | 5237 | } |
@@ -5112,6 +5240,37 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) | |||
5112 | } | 5240 | } |
5113 | EXPORT_SYMBOL(netdev_fix_features); | 5241 | EXPORT_SYMBOL(netdev_fix_features); |
5114 | 5242 | ||
5243 | void netdev_update_features(struct net_device *dev) | ||
5244 | { | ||
5245 | u32 features; | ||
5246 | int err = 0; | ||
5247 | |||
5248 | features = netdev_get_wanted_features(dev); | ||
5249 | |||
5250 | if (dev->netdev_ops->ndo_fix_features) | ||
5251 | features = dev->netdev_ops->ndo_fix_features(dev, features); | ||
5252 | |||
5253 | /* driver might be less strict about feature dependencies */ | ||
5254 | features = netdev_fix_features(dev, features); | ||
5255 | |||
5256 | if (dev->features == features) | ||
5257 | return; | ||
5258 | |||
5259 | netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", | ||
5260 | dev->features, features); | ||
5261 | |||
5262 | if (dev->netdev_ops->ndo_set_features) | ||
5263 | err = dev->netdev_ops->ndo_set_features(dev, features); | ||
5264 | |||
5265 | if (!err) | ||
5266 | dev->features = features; | ||
5267 | else if (err < 0) | ||
5268 | netdev_err(dev, | ||
5269 | "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", | ||
5270 | err, features, dev->features); | ||
5271 | } | ||
5272 | EXPORT_SYMBOL(netdev_update_features); | ||
5273 | |||
5115 | /** | 5274 | /** |
5116 | * netif_stacked_transfer_operstate - transfer operstate | 5275 | * netif_stacked_transfer_operstate - transfer operstate |
5117 | * @rootdev: the root or lower level device to transfer state from | 5276 | * @rootdev: the root or lower level device to transfer state from |
@@ -5246,27 +5405,19 @@ int register_netdevice(struct net_device *dev) | |||
5246 | if (dev->iflink == -1) | 5405 | if (dev->iflink == -1) |
5247 | dev->iflink = dev->ifindex; | 5406 | dev->iflink = dev->ifindex; |
5248 | 5407 | ||
5249 | /* Fix illegal checksum combinations */ | 5408 | /* Transfer changeable features to wanted_features and enable |
5250 | if ((dev->features & NETIF_F_HW_CSUM) && | 5409 | * software offloads (GSO and GRO). |
5251 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5410 | */ |
5252 | printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", | 5411 | dev->hw_features |= NETIF_F_SOFT_FEATURES; |
5253 | dev->name); | 5412 | dev->features |= NETIF_F_SOFT_FEATURES; |
5254 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | 5413 | dev->wanted_features = dev->features & dev->hw_features; |
5255 | } | ||
5256 | 5414 | ||
5257 | if ((dev->features & NETIF_F_NO_CSUM) && | 5415 | /* Avoid warning from netdev_fix_features() for GSO without SG */ |
5258 | (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5416 | if (!(dev->wanted_features & NETIF_F_SG)) { |
5259 | printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", | 5417 | dev->wanted_features &= ~NETIF_F_GSO; |
5260 | dev->name); | 5418 | dev->features &= ~NETIF_F_GSO; |
5261 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); | ||
5262 | } | 5419 | } |
5263 | 5420 | ||
5264 | dev->features = netdev_fix_features(dev->features, dev->name); | ||
5265 | |||
5266 | /* Enable software GSO if SG is supported. */ | ||
5267 | if (dev->features & NETIF_F_SG) | ||
5268 | dev->features |= NETIF_F_GSO; | ||
5269 | |||
5270 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, | 5421 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, |
5271 | * vlan_dev_init() will do the dev->features check, so these features | 5422 | * vlan_dev_init() will do the dev->features check, so these features |
5272 | * are enabled only if supported by underlying device. | 5423 | * are enabled only if supported by underlying device. |
@@ -5283,6 +5434,8 @@ int register_netdevice(struct net_device *dev) | |||
5283 | goto err_uninit; | 5434 | goto err_uninit; |
5284 | dev->reg_state = NETREG_REGISTERED; | 5435 | dev->reg_state = NETREG_REGISTERED; |
5285 | 5436 | ||
5437 | netdev_update_features(dev); | ||
5438 | |||
5286 | /* | 5439 | /* |
5287 | * Default initial state at registry is that the | 5440 | * Default initial state at registry is that the |
5288 | * device is present. | 5441 | * device is present. |
@@ -5687,6 +5840,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
5687 | #endif | 5840 | #endif |
5688 | 5841 | ||
5689 | strcpy(dev->name, name); | 5842 | strcpy(dev->name, name); |
5843 | dev->group = INIT_NETDEV_GROUP; | ||
5690 | return dev; | 5844 | return dev; |
5691 | 5845 | ||
5692 | free_all: | 5846 | free_all: |
@@ -6001,8 +6155,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
6001 | * @one to the master device with current feature set @all. Will not | 6155 | * @one to the master device with current feature set @all. Will not |
6002 | * enable anything that is off in @mask. Returns the new feature set. | 6156 | * enable anything that is off in @mask. Returns the new feature set. |
6003 | */ | 6157 | */ |
6004 | unsigned long netdev_increment_features(unsigned long all, unsigned long one, | 6158 | u32 netdev_increment_features(u32 all, u32 one, u32 mask) |
6005 | unsigned long mask) | ||
6006 | { | 6159 | { |
6007 | /* If device needs checksumming, downgrade to it. */ | 6160 | /* If device needs checksumming, downgrade to it. */ |
6008 | if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) | 6161 | if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) |
@@ -6187,7 +6340,7 @@ static void __net_exit default_device_exit(struct net *net) | |||
6187 | if (dev->rtnl_link_ops) | 6340 | if (dev->rtnl_link_ops) |
6188 | continue; | 6341 | continue; |
6189 | 6342 | ||
6190 | /* Push remaing network devices to init_net */ | 6343 | /* Push remaining network devices to init_net */ |
6191 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); | 6344 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
6192 | err = dev_change_net_namespace(dev, &init_net, fb_name); | 6345 | err = dev_change_net_namespace(dev, &init_net, fb_name); |
6193 | if (err) { | 6346 | if (err) { |