aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2011-05-14 06:06:36 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-14 06:06:36 -0400
commita18f22a968de17b29f2310cdb7ba69163e65ec15 (patch)
treea7d56d88fad5e444d7661484109758a2f436129e /net/core/dev.c
parenta1c57e0fec53defe745e64417eacdbd3618c3e66 (diff)
parent798778b8653f64b7b2162ac70eca10367cff6ce8 (diff)
Merge branch 'consolidate-clksrc-i8253' of master.kernel.org:~rmk/linux-2.6-arm into timers/clocksource
Conflicts: arch/ia64/kernel/cyclone.c arch/mips/kernel/i8253.c arch/x86/kernel/i8253.c Reason: Resolve conflicts so further cleanups do not conflict further Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c599
1 files changed, 376 insertions, 223 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc2..856b6ee9a1d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h>
135 136
136#include "net-sysfs.h" 137#include "net-sysfs.h"
137 138
@@ -1114,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
1114void dev_load(struct net *net, const char *name) 1115void dev_load(struct net *net, const char *name)
1115{ 1116{
1116 struct net_device *dev; 1117 struct net_device *dev;
1118 int no_module;
1117 1119
1118 rcu_read_lock(); 1120 rcu_read_lock();
1119 dev = dev_get_by_name_rcu(net, name); 1121 dev = dev_get_by_name_rcu(net, name);
1120 rcu_read_unlock(); 1122 rcu_read_unlock();
1121 1123
1122 if (!dev && capable(CAP_NET_ADMIN)) 1124 no_module = !dev;
1123 request_module("%s", name); 1125 if (no_module && capable(CAP_NET_ADMIN))
1126 no_module = request_module("netdev-%s", name);
1127 if (no_module && capable(CAP_SYS_MODULE)) {
1128 if (!request_module("%s", name))
1129 pr_err("Loading kernel module for a network device "
1130"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1131"instead\n", name);
1132 }
1124} 1133}
1125EXPORT_SYMBOL(dev_load); 1134EXPORT_SYMBOL(dev_load);
1126 1135
@@ -1131,9 +1140,6 @@ static int __dev_open(struct net_device *dev)
1131 1140
1132 ASSERT_RTNL(); 1141 ASSERT_RTNL();
1133 1142
1134 /*
1135 * Is it even present?
1136 */
1137 if (!netif_device_present(dev)) 1143 if (!netif_device_present(dev))
1138 return -ENODEV; 1144 return -ENODEV;
1139 1145
@@ -1142,9 +1148,6 @@ static int __dev_open(struct net_device *dev)
1142 if (ret) 1148 if (ret)
1143 return ret; 1149 return ret;
1144 1150
1145 /*
1146 * Call device private open method
1147 */
1148 set_bit(__LINK_STATE_START, &dev->state); 1151 set_bit(__LINK_STATE_START, &dev->state);
1149 1152
1150 if (ops->ndo_validate_addr) 1153 if (ops->ndo_validate_addr)
@@ -1153,31 +1156,12 @@ static int __dev_open(struct net_device *dev)
1153 if (!ret && ops->ndo_open) 1156 if (!ret && ops->ndo_open)
1154 ret = ops->ndo_open(dev); 1157 ret = ops->ndo_open(dev);
1155 1158
1156 /*
1157 * If it went open OK then:
1158 */
1159
1160 if (ret) 1159 if (ret)
1161 clear_bit(__LINK_STATE_START, &dev->state); 1160 clear_bit(__LINK_STATE_START, &dev->state);
1162 else { 1161 else {
1163 /*
1164 * Set the flags.
1165 */
1166 dev->flags |= IFF_UP; 1162 dev->flags |= IFF_UP;
1167
1168 /*
1169 * Enable NET_DMA
1170 */
1171 net_dmaengine_get(); 1163 net_dmaengine_get();
1172
1173 /*
1174 * Initialize multicasting status
1175 */
1176 dev_set_rx_mode(dev); 1164 dev_set_rx_mode(dev);
1177
1178 /*
1179 * Wakeup transmit queue engine
1180 */
1181 dev_activate(dev); 1165 dev_activate(dev);
1182 } 1166 }
1183 1167
@@ -1200,22 +1184,13 @@ int dev_open(struct net_device *dev)
1200{ 1184{
1201 int ret; 1185 int ret;
1202 1186
1203 /*
1204 * Is it already up?
1205 */
1206 if (dev->flags & IFF_UP) 1187 if (dev->flags & IFF_UP)
1207 return 0; 1188 return 0;
1208 1189
1209 /*
1210 * Open device
1211 */
1212 ret = __dev_open(dev); 1190 ret = __dev_open(dev);
1213 if (ret < 0) 1191 if (ret < 0)
1214 return ret; 1192 return ret;
1215 1193
1216 /*
1217 * ... and announce new interface.
1218 */
1219 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1194 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1220 call_netdevice_notifiers(NETDEV_UP, dev); 1195 call_netdevice_notifiers(NETDEV_UP, dev);
1221 1196
@@ -1231,10 +1206,6 @@ static int __dev_close_many(struct list_head *head)
1231 might_sleep(); 1206 might_sleep();
1232 1207
1233 list_for_each_entry(dev, head, unreg_list) { 1208 list_for_each_entry(dev, head, unreg_list) {
1234 /*
1235 * Tell people we are going down, so that they can
1236 * prepare to death, when device is still operating.
1237 */
1238 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1209 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1239 1210
1240 clear_bit(__LINK_STATE_START, &dev->state); 1211 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1263,15 +1234,7 @@ static int __dev_close_many(struct list_head *head)
1263 if (ops->ndo_stop) 1234 if (ops->ndo_stop)
1264 ops->ndo_stop(dev); 1235 ops->ndo_stop(dev);
1265 1236
1266 /*
1267 * Device is now down.
1268 */
1269
1270 dev->flags &= ~IFF_UP; 1237 dev->flags &= ~IFF_UP;
1271
1272 /*
1273 * Shutdown NET_DMA
1274 */
1275 net_dmaengine_put(); 1238 net_dmaengine_put();
1276 } 1239 }
1277 1240
@@ -1289,7 +1252,7 @@ static int __dev_close(struct net_device *dev)
1289 return retval; 1252 return retval;
1290} 1253}
1291 1254
1292int dev_close_many(struct list_head *head) 1255static int dev_close_many(struct list_head *head)
1293{ 1256{
1294 struct net_device *dev, *tmp; 1257 struct net_device *dev, *tmp;
1295 LIST_HEAD(tmp_list); 1258 LIST_HEAD(tmp_list);
@@ -1300,9 +1263,6 @@ int dev_close_many(struct list_head *head)
1300 1263
1301 __dev_close_many(head); 1264 __dev_close_many(head);
1302 1265
1303 /*
1304 * Tell people we are down
1305 */
1306 list_for_each_entry(dev, head, unreg_list) { 1266 list_for_each_entry(dev, head, unreg_list) {
1307 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1267 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1308 call_netdevice_notifiers(NETDEV_DOWN, dev); 1268 call_netdevice_notifiers(NETDEV_DOWN, dev);
@@ -1344,14 +1304,17 @@ EXPORT_SYMBOL(dev_close);
1344 */ 1304 */
1345void dev_disable_lro(struct net_device *dev) 1305void dev_disable_lro(struct net_device *dev)
1346{ 1306{
1347 if (dev->ethtool_ops && dev->ethtool_ops->get_flags && 1307 u32 flags;
1348 dev->ethtool_ops->set_flags) { 1308
1349 u32 flags = dev->ethtool_ops->get_flags(dev); 1309 if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
1350 if (flags & ETH_FLAG_LRO) { 1310 flags = dev->ethtool_ops->get_flags(dev);
1351 flags &= ~ETH_FLAG_LRO; 1311 else
1352 dev->ethtool_ops->set_flags(dev, flags); 1312 flags = ethtool_op_get_flags(dev);
1353 } 1313
1354 } 1314 if (!(flags & ETH_FLAG_LRO))
1315 return;
1316
1317 __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
1355 WARN_ON(dev->features & NETIF_F_LRO); 1318 WARN_ON(dev->features & NETIF_F_LRO);
1356} 1319}
1357EXPORT_SYMBOL(dev_disable_lro); 1320EXPORT_SYMBOL(dev_disable_lro);
@@ -1359,11 +1322,6 @@ EXPORT_SYMBOL(dev_disable_lro);
1359 1322
1360static int dev_boot_phase = 1; 1323static int dev_boot_phase = 1;
1361 1324
1362/*
1363 * Device change register/unregister. These are not inline or static
1364 * as we export them to the world.
1365 */
1366
1367/** 1325/**
1368 * register_netdevice_notifier - register a network notifier block 1326 * register_netdevice_notifier - register a network notifier block
1369 * @nb: notifier 1327 * @nb: notifier
@@ -1465,6 +1423,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1465 ASSERT_RTNL(); 1423 ASSERT_RTNL();
1466 return raw_notifier_call_chain(&netdev_chain, val, dev); 1424 return raw_notifier_call_chain(&netdev_chain, val, dev);
1467} 1425}
1426EXPORT_SYMBOL(call_netdevice_notifiers);
1468 1427
1469/* When > 0 there are consumers of rx skb time stamps */ 1428/* When > 0 there are consumers of rx skb time stamps */
1470static atomic_t netstamp_needed = ATOMIC_INIT(0); 1429static atomic_t netstamp_needed = ATOMIC_INIT(0);
@@ -1495,6 +1454,27 @@ static inline void net_timestamp_check(struct sk_buff *skb)
1495 __net_timestamp(skb); 1454 __net_timestamp(skb);
1496} 1455}
1497 1456
1457static inline bool is_skb_forwardable(struct net_device *dev,
1458 struct sk_buff *skb)
1459{
1460 unsigned int len;
1461
1462 if (!(dev->flags & IFF_UP))
1463 return false;
1464
1465 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1466 if (skb->len <= len)
1467 return true;
1468
1469 /* if TSO is enabled, we don't care about the length as the packet
1470 * could be forwarded without being segmented before
1471 */
1472 if (skb_is_gso(skb))
1473 return true;
1474
1475 return false;
1476}
1477
1498/** 1478/**
1499 * dev_forward_skb - loopback an skb to another netif 1479 * dev_forward_skb - loopback an skb to another netif
1500 * 1480 *
@@ -1518,8 +1498,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1518 skb_orphan(skb); 1498 skb_orphan(skb);
1519 nf_reset(skb); 1499 nf_reset(skb);
1520 1500
1521 if (unlikely(!(dev->flags & IFF_UP) || 1501 if (unlikely(!is_skb_forwardable(dev, skb))) {
1522 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1523 atomic_long_inc(&dev->rx_dropped); 1502 atomic_long_inc(&dev->rx_dropped);
1524 kfree_skb(skb); 1503 kfree_skb(skb);
1525 return NET_RX_DROP; 1504 return NET_RX_DROP;
@@ -1597,6 +1576,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1597 rcu_read_unlock(); 1576 rcu_read_unlock();
1598} 1577}
1599 1578
1579/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1580 * @dev: Network device
1581 * @txq: number of queues available
1582 *
1583 * If real_num_tx_queues is changed the tc mappings may no longer be
1584 * valid. To resolve this verify the tc mapping remains valid and if
1585 * not NULL the mapping. With no priorities mapping to this
1586 * offset/count pair it will no longer be used. In the worst case TC0
1587 * is invalid nothing can be done so disable priority mappings. If is
1588 * expected that drivers will fix this mapping if they can before
1589 * calling netif_set_real_num_tx_queues.
1590 */
1591static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1592{
1593 int i;
1594 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1595
1596 /* If TC0 is invalidated disable TC mapping */
1597 if (tc->offset + tc->count > txq) {
1598 pr_warning("Number of in use tx queues changed "
1599 "invalidating tc mappings. Priority "
1600 "traffic classification disabled!\n");
1601 dev->num_tc = 0;
1602 return;
1603 }
1604
1605 /* Invalidated prio to tc mappings set to TC0 */
1606 for (i = 1; i < TC_BITMASK + 1; i++) {
1607 int q = netdev_get_prio_tc_map(dev, i);
1608
1609 tc = &dev->tc_to_txq[q];
1610 if (tc->offset + tc->count > txq) {
1611 pr_warning("Number of in use tx queues "
1612 "changed. Priority %i to tc "
1613 "mapping %i is no longer valid "
1614 "setting map to 0\n",
1615 i, q);
1616 netdev_set_prio_tc_map(dev, i, 0);
1617 }
1618 }
1619}
1620
1600/* 1621/*
1601 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1622 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1602 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1623 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1608,7 +1629,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1608 if (txq < 1 || txq > dev->num_tx_queues) 1629 if (txq < 1 || txq > dev->num_tx_queues)
1609 return -EINVAL; 1630 return -EINVAL;
1610 1631
1611 if (dev->reg_state == NETREG_REGISTERED) { 1632 if (dev->reg_state == NETREG_REGISTERED ||
1633 dev->reg_state == NETREG_UNREGISTERING) {
1612 ASSERT_RTNL(); 1634 ASSERT_RTNL();
1613 1635
1614 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, 1636 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -1616,6 +1638,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1616 if (rc) 1638 if (rc)
1617 return rc; 1639 return rc;
1618 1640
1641 if (dev->num_tc)
1642 netif_setup_tc(dev, txq);
1643
1619 if (txq < dev->real_num_tx_queues) 1644 if (txq < dev->real_num_tx_queues)
1620 qdisc_reset_all_tx_gt(dev, txq); 1645 qdisc_reset_all_tx_gt(dev, txq);
1621 } 1646 }
@@ -1815,7 +1840,7 @@ EXPORT_SYMBOL(skb_checksum_help);
1815 * It may return NULL if the skb requires no segmentation. This is 1840 * It may return NULL if the skb requires no segmentation. This is
1816 * only possible when GSO is used for verifying header integrity. 1841 * only possible when GSO is used for verifying header integrity.
1817 */ 1842 */
1818struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) 1843struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
1819{ 1844{
1820 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1845 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1821 struct packet_type *ptype; 1846 struct packet_type *ptype;
@@ -2003,7 +2028,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
2003 protocol == htons(ETH_P_FCOE))); 2028 protocol == htons(ETH_P_FCOE)));
2004} 2029}
2005 2030
2006static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) 2031static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
2007{ 2032{
2008 if (!can_checksum_protocol(features, protocol)) { 2033 if (!can_checksum_protocol(features, protocol)) {
2009 features &= ~NETIF_F_ALL_CSUM; 2034 features &= ~NETIF_F_ALL_CSUM;
@@ -2015,10 +2040,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features
2015 return features; 2040 return features;
2016} 2041}
2017 2042
2018int netif_skb_features(struct sk_buff *skb) 2043u32 netif_skb_features(struct sk_buff *skb)
2019{ 2044{
2020 __be16 protocol = skb->protocol; 2045 __be16 protocol = skb->protocol;
2021 int features = skb->dev->features; 2046 u32 features = skb->dev->features;
2022 2047
2023 if (protocol == htons(ETH_P_8021Q)) { 2048 if (protocol == htons(ETH_P_8021Q)) {
2024 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2049 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2063,10 +2088,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2063 int rc = NETDEV_TX_OK; 2088 int rc = NETDEV_TX_OK;
2064 2089
2065 if (likely(!skb->next)) { 2090 if (likely(!skb->next)) {
2066 int features; 2091 u32 features;
2067 2092
2068 /* 2093 /*
2069 * If device doesnt need skb->dst, release it right now while 2094 * If device doesn't need skb->dst, release it right now while
2070 * its hot in this cpu cache 2095 * its hot in this cpu cache
2071 */ 2096 */
2072 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2097 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
@@ -2126,7 +2151,7 @@ gso:
2126 nskb->next = NULL; 2151 nskb->next = NULL;
2127 2152
2128 /* 2153 /*
2129 * If device doesnt need nskb->dst, release it right now while 2154 * If device doesn't need nskb->dst, release it right now while
2130 * its hot in this cpu cache 2155 * its hot in this cpu cache
2131 */ 2156 */
2132 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2157 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
@@ -2165,6 +2190,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2165 unsigned int num_tx_queues) 2190 unsigned int num_tx_queues)
2166{ 2191{
2167 u32 hash; 2192 u32 hash;
2193 u16 qoffset = 0;
2194 u16 qcount = num_tx_queues;
2168 2195
2169 if (skb_rx_queue_recorded(skb)) { 2196 if (skb_rx_queue_recorded(skb)) {
2170 hash = skb_get_rx_queue(skb); 2197 hash = skb_get_rx_queue(skb);
@@ -2173,13 +2200,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2173 return hash; 2200 return hash;
2174 } 2201 }
2175 2202
2203 if (dev->num_tc) {
2204 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2205 qoffset = dev->tc_to_txq[tc].offset;
2206 qcount = dev->tc_to_txq[tc].count;
2207 }
2208
2176 if (skb->sk && skb->sk->sk_hash) 2209 if (skb->sk && skb->sk->sk_hash)
2177 hash = skb->sk->sk_hash; 2210 hash = skb->sk->sk_hash;
2178 else 2211 else
2179 hash = (__force u16) skb->protocol ^ skb->rxhash; 2212 hash = (__force u16) skb->protocol ^ skb->rxhash;
2180 hash = jhash_1word(hash, hashrnd); 2213 hash = jhash_1word(hash, hashrnd);
2181 2214
2182 return (u16) (((u64) hash * num_tx_queues) >> 32); 2215 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2183} 2216}
2184EXPORT_SYMBOL(__skb_tx_hash); 2217EXPORT_SYMBOL(__skb_tx_hash);
2185 2218
@@ -2276,15 +2309,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2276 struct netdev_queue *txq) 2309 struct netdev_queue *txq)
2277{ 2310{
2278 spinlock_t *root_lock = qdisc_lock(q); 2311 spinlock_t *root_lock = qdisc_lock(q);
2279 bool contended = qdisc_is_running(q); 2312 bool contended;
2280 int rc; 2313 int rc;
2281 2314
2315 qdisc_skb_cb(skb)->pkt_len = skb->len;
2316 qdisc_calculate_pkt_len(skb, q);
2282 /* 2317 /*
2283 * Heuristic to force contended enqueues to serialize on a 2318 * Heuristic to force contended enqueues to serialize on a
2284 * separate lock before trying to get qdisc main lock. 2319 * separate lock before trying to get qdisc main lock.
2285 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2320 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2286 * and dequeue packets faster. 2321 * and dequeue packets faster.
2287 */ 2322 */
2323 contended = qdisc_is_running(q);
2288 if (unlikely(contended)) 2324 if (unlikely(contended))
2289 spin_lock(&q->busylock); 2325 spin_lock(&q->busylock);
2290 2326
@@ -2302,7 +2338,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2302 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2338 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2303 skb_dst_force(skb); 2339 skb_dst_force(skb);
2304 2340
2305 qdisc_skb_cb(skb)->pkt_len = skb->len;
2306 qdisc_bstats_update(q, skb); 2341 qdisc_bstats_update(q, skb);
2307 2342
2308 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { 2343 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2317,7 +2352,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2317 rc = NET_XMIT_SUCCESS; 2352 rc = NET_XMIT_SUCCESS;
2318 } else { 2353 } else {
2319 skb_dst_force(skb); 2354 skb_dst_force(skb);
2320 rc = qdisc_enqueue_root(skb, q); 2355 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2321 if (qdisc_run_begin(q)) { 2356 if (qdisc_run_begin(q)) {
2322 if (unlikely(contended)) { 2357 if (unlikely(contended)) {
2323 spin_unlock(&q->busylock); 2358 spin_unlock(&q->busylock);
@@ -2536,6 +2571,54 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2536struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2571struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2537EXPORT_SYMBOL(rps_sock_flow_table); 2572EXPORT_SYMBOL(rps_sock_flow_table);
2538 2573
2574static struct rps_dev_flow *
2575set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2576 struct rps_dev_flow *rflow, u16 next_cpu)
2577{
2578 u16 tcpu;
2579
2580 tcpu = rflow->cpu = next_cpu;
2581 if (tcpu != RPS_NO_CPU) {
2582#ifdef CONFIG_RFS_ACCEL
2583 struct netdev_rx_queue *rxqueue;
2584 struct rps_dev_flow_table *flow_table;
2585 struct rps_dev_flow *old_rflow;
2586 u32 flow_id;
2587 u16 rxq_index;
2588 int rc;
2589
2590 /* Should we steer this flow to a different hardware queue? */
2591 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2592 !(dev->features & NETIF_F_NTUPLE))
2593 goto out;
2594 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2595 if (rxq_index == skb_get_rx_queue(skb))
2596 goto out;
2597
2598 rxqueue = dev->_rx + rxq_index;
2599 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2600 if (!flow_table)
2601 goto out;
2602 flow_id = skb->rxhash & flow_table->mask;
2603 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2604 rxq_index, flow_id);
2605 if (rc < 0)
2606 goto out;
2607 old_rflow = rflow;
2608 rflow = &flow_table->flows[flow_id];
2609 rflow->cpu = next_cpu;
2610 rflow->filter = rc;
2611 if (old_rflow->filter == rflow->filter)
2612 old_rflow->filter = RPS_NO_FILTER;
2613 out:
2614#endif
2615 rflow->last_qtail =
2616 per_cpu(softnet_data, tcpu).input_queue_head;
2617 }
2618
2619 return rflow;
2620}
2621
2539/* 2622/*
2540 * get_rps_cpu is called from netif_receive_skb and returns the target 2623 * get_rps_cpu is called from netif_receive_skb and returns the target
2541 * CPU from the RPS map of the receiving queue for a given skb. 2624 * CPU from the RPS map of the receiving queue for a given skb.
@@ -2607,12 +2690,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2607 if (unlikely(tcpu != next_cpu) && 2690 if (unlikely(tcpu != next_cpu) &&
2608 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 2691 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2609 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 2692 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2610 rflow->last_qtail)) >= 0)) { 2693 rflow->last_qtail)) >= 0))
2611 tcpu = rflow->cpu = next_cpu; 2694 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2612 if (tcpu != RPS_NO_CPU) 2695
2613 rflow->last_qtail = per_cpu(softnet_data,
2614 tcpu).input_queue_head;
2615 }
2616 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 2696 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2617 *rflowp = rflow; 2697 *rflowp = rflow;
2618 cpu = tcpu; 2698 cpu = tcpu;
@@ -2633,6 +2713,46 @@ done:
2633 return cpu; 2713 return cpu;
2634} 2714}
2635 2715
2716#ifdef CONFIG_RFS_ACCEL
2717
2718/**
2719 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2720 * @dev: Device on which the filter was set
2721 * @rxq_index: RX queue index
2722 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2723 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2724 *
2725 * Drivers that implement ndo_rx_flow_steer() should periodically call
2726 * this function for each installed filter and remove the filters for
2727 * which it returns %true.
2728 */
2729bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2730 u32 flow_id, u16 filter_id)
2731{
2732 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2733 struct rps_dev_flow_table *flow_table;
2734 struct rps_dev_flow *rflow;
2735 bool expire = true;
2736 int cpu;
2737
2738 rcu_read_lock();
2739 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2740 if (flow_table && flow_id <= flow_table->mask) {
2741 rflow = &flow_table->flows[flow_id];
2742 cpu = ACCESS_ONCE(rflow->cpu);
2743 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2744 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2745 rflow->last_qtail) <
2746 (int)(10 * flow_table->mask)))
2747 expire = false;
2748 }
2749 rcu_read_unlock();
2750 return expire;
2751}
2752EXPORT_SYMBOL(rps_may_expire_flow);
2753
2754#endif /* CONFIG_RFS_ACCEL */
2755
2636/* Called from hardirq (IPI) context */ 2756/* Called from hardirq (IPI) context */
2637static void rps_trigger_softirq(void *data) 2757static void rps_trigger_softirq(void *data)
2638{ 2758{
@@ -2850,8 +2970,8 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2850 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2970 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2851 * a compare and 2 stores extra right now if we dont have it on 2971 * a compare and 2 stores extra right now if we dont have it on
2852 * but have CONFIG_NET_CLS_ACT 2972 * but have CONFIG_NET_CLS_ACT
2853 * NOTE: This doesnt stop any functionality; if you dont have 2973 * NOTE: This doesn't stop any functionality; if you dont have
2854 * the ingress scheduler, you just cant add policies on ingress. 2974 * the ingress scheduler, you just can't add policies on ingress.
2855 * 2975 *
2856 */ 2976 */
2857static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) 2977static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
@@ -2920,6 +3040,8 @@ out:
2920 * on a failure. 3040 * on a failure.
2921 * 3041 *
2922 * The caller must hold the rtnl_mutex. 3042 * The caller must hold the rtnl_mutex.
3043 *
3044 * For a general description of rx_handler, see enum rx_handler_result.
2923 */ 3045 */
2924int netdev_rx_handler_register(struct net_device *dev, 3046int netdev_rx_handler_register(struct net_device *dev,
2925 rx_handler_func_t *rx_handler, 3047 rx_handler_func_t *rx_handler,
@@ -2954,64 +3076,32 @@ void netdev_rx_handler_unregister(struct net_device *dev)
2954} 3076}
2955EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3077EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2956 3078
2957static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, 3079static void vlan_on_bond_hook(struct sk_buff *skb)
2958 struct net_device *master)
2959{
2960 if (skb->pkt_type == PACKET_HOST) {
2961 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2962
2963 memcpy(dest, master->dev_addr, ETH_ALEN);
2964 }
2965}
2966
2967/* On bonding slaves other than the currently active slave, suppress
2968 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2969 * ARP on active-backup slaves with arp_validate enabled.
2970 */
2971int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2972{ 3080{
2973 struct net_device *dev = skb->dev; 3081 /*
2974 3082 * Make sure ARP frames received on VLAN interfaces stacked on
2975 if (master->priv_flags & IFF_MASTER_ARPMON) 3083 * bonding interfaces still make their way to any base bonding
2976 dev->last_rx = jiffies; 3084 * device that may have registered for a specific ptype.
2977 3085 */
2978 if ((master->priv_flags & IFF_MASTER_ALB) && 3086 if (skb->dev->priv_flags & IFF_802_1Q_VLAN &&
2979 (master->priv_flags & IFF_BRIDGE_PORT)) { 3087 vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING &&
2980 /* Do address unmangle. The local destination address 3088 skb->protocol == htons(ETH_P_ARP)) {
2981 * will be always the one master has. Provides the right 3089 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2982 * functionality in a bridge.
2983 */
2984 skb_bond_set_mac_by_master(skb, master);
2985 }
2986
2987 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2988 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2989 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2990 return 0;
2991
2992 if (master->priv_flags & IFF_MASTER_ALB) {
2993 if (skb->pkt_type != PACKET_BROADCAST &&
2994 skb->pkt_type != PACKET_MULTICAST)
2995 return 0;
2996 }
2997 if (master->priv_flags & IFF_MASTER_8023AD &&
2998 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2999 return 0;
3000 3090
3001 return 1; 3091 if (!skb2)
3092 return;
3093 skb2->dev = vlan_dev_real_dev(skb->dev);
3094 netif_rx(skb2);
3002 } 3095 }
3003 return 0;
3004} 3096}
3005EXPORT_SYMBOL(__skb_bond_should_drop);
3006 3097
3007static int __netif_receive_skb(struct sk_buff *skb) 3098static int __netif_receive_skb(struct sk_buff *skb)
3008{ 3099{
3009 struct packet_type *ptype, *pt_prev; 3100 struct packet_type *ptype, *pt_prev;
3010 rx_handler_func_t *rx_handler; 3101 rx_handler_func_t *rx_handler;
3011 struct net_device *orig_dev; 3102 struct net_device *orig_dev;
3012 struct net_device *master; 3103 struct net_device *null_or_dev;
3013 struct net_device *null_or_orig; 3104 bool deliver_exact = false;
3014 struct net_device *orig_or_bond;
3015 int ret = NET_RX_DROP; 3105 int ret = NET_RX_DROP;
3016 __be16 type; 3106 __be16 type;
3017 3107
@@ -3026,28 +3116,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
3026 3116
3027 if (!skb->skb_iif) 3117 if (!skb->skb_iif)
3028 skb->skb_iif = skb->dev->ifindex; 3118 skb->skb_iif = skb->dev->ifindex;
3029
3030 /*
3031 * bonding note: skbs received on inactive slaves should only
3032 * be delivered to pkt handlers that are exact matches. Also
3033 * the deliver_no_wcard flag will be set. If packet handlers
3034 * are sensitive to duplicate packets these skbs will need to
3035 * be dropped at the handler.
3036 */
3037 null_or_orig = NULL;
3038 orig_dev = skb->dev; 3119 orig_dev = skb->dev;
3039 master = ACCESS_ONCE(orig_dev->master);
3040 if (skb->deliver_no_wcard)
3041 null_or_orig = orig_dev;
3042 else if (master) {
3043 if (skb_bond_should_drop(skb, master)) {
3044 skb->deliver_no_wcard = 1;
3045 null_or_orig = orig_dev; /* deliver only exact match */
3046 } else
3047 skb->dev = master;
3048 }
3049 3120
3050 __this_cpu_inc(softnet_data.processed);
3051 skb_reset_network_header(skb); 3121 skb_reset_network_header(skb);
3052 skb_reset_transport_header(skb); 3122 skb_reset_transport_header(skb);
3053 skb->mac_len = skb->network_header - skb->mac_header; 3123 skb->mac_len = skb->network_header - skb->mac_header;
@@ -3056,6 +3126,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
3056 3126
3057 rcu_read_lock(); 3127 rcu_read_lock();
3058 3128
3129another_round:
3130
3131 __this_cpu_inc(softnet_data.processed);
3132
3059#ifdef CONFIG_NET_CLS_ACT 3133#ifdef CONFIG_NET_CLS_ACT
3060 if (skb->tc_verd & TC_NCLS) { 3134 if (skb->tc_verd & TC_NCLS) {
3061 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 3135 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3064,8 +3138,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3064#endif 3138#endif
3065 3139
3066 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3140 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3067 if (ptype->dev == null_or_orig || ptype->dev == skb->dev || 3141 if (!ptype->dev || ptype->dev == skb->dev) {
3068 ptype->dev == orig_dev) {
3069 if (pt_prev) 3142 if (pt_prev)
3070 ret = deliver_skb(skb, pt_prev, orig_dev); 3143 ret = deliver_skb(skb, pt_prev, orig_dev);
3071 pt_prev = ptype; 3144 pt_prev = ptype;
@@ -3079,16 +3152,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
3079ncls: 3152ncls:
3080#endif 3153#endif
3081 3154
3082 /* Handle special case of bridge or macvlan */
3083 rx_handler = rcu_dereference(skb->dev->rx_handler); 3155 rx_handler = rcu_dereference(skb->dev->rx_handler);
3084 if (rx_handler) { 3156 if (rx_handler) {
3085 if (pt_prev) { 3157 if (pt_prev) {
3086 ret = deliver_skb(skb, pt_prev, orig_dev); 3158 ret = deliver_skb(skb, pt_prev, orig_dev);
3087 pt_prev = NULL; 3159 pt_prev = NULL;
3088 } 3160 }
3089 skb = rx_handler(skb); 3161 switch (rx_handler(&skb)) {
3090 if (!skb) 3162 case RX_HANDLER_CONSUMED:
3091 goto out; 3163 goto out;
3164 case RX_HANDLER_ANOTHER:
3165 goto another_round;
3166 case RX_HANDLER_EXACT:
3167 deliver_exact = true;
3168 case RX_HANDLER_PASS:
3169 break;
3170 default:
3171 BUG();
3172 }
3092 } 3173 }
3093 3174
3094 if (vlan_tx_tag_present(skb)) { 3175 if (vlan_tx_tag_present(skb)) {
@@ -3103,24 +3184,17 @@ ncls:
3103 goto out; 3184 goto out;
3104 } 3185 }
3105 3186
3106 /* 3187 vlan_on_bond_hook(skb);
3107 * Make sure frames received on VLAN interfaces stacked on 3188
3108 * bonding interfaces still make their way to any base bonding 3189 /* deliver only exact match when indicated */
3109 * device that may have registered for a specific ptype. The 3190 null_or_dev = deliver_exact ? skb->dev : NULL;
3110 * handler may have to adjust skb->dev and orig_dev.
3111 */
3112 orig_or_bond = orig_dev;
3113 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
3114 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
3115 orig_or_bond = vlan_dev_real_dev(skb->dev);
3116 }
3117 3191
3118 type = skb->protocol; 3192 type = skb->protocol;
3119 list_for_each_entry_rcu(ptype, 3193 list_for_each_entry_rcu(ptype,
3120 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 3194 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3121 if (ptype->type == type && (ptype->dev == null_or_orig || 3195 if (ptype->type == type &&
3122 ptype->dev == skb->dev || ptype->dev == orig_dev || 3196 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3123 ptype->dev == orig_or_bond)) { 3197 ptype->dev == orig_dev)) {
3124 if (pt_prev) 3198 if (pt_prev)
3125 ret = deliver_skb(skb, pt_prev, orig_dev); 3199 ret = deliver_skb(skb, pt_prev, orig_dev);
3126 pt_prev = ptype; 3200 pt_prev = ptype;
@@ -3726,7 +3800,7 @@ static void net_rx_action(struct softirq_action *h)
3726 * with netpoll's poll_napi(). Only the entity which 3800 * with netpoll's poll_napi(). Only the entity which
3727 * obtains the lock and sees NAPI_STATE_SCHED set will 3801 * obtains the lock and sees NAPI_STATE_SCHED set will
3728 * actually make the ->poll() call. Therefore we avoid 3802 * actually make the ->poll() call. Therefore we avoid
3729 * accidently calling ->poll() when NAPI is not scheduled. 3803 * accidentally calling ->poll() when NAPI is not scheduled.
3730 */ 3804 */
3731 work = 0; 3805 work = 0;
3732 if (test_bit(NAPI_STATE_SCHED, &n->state)) { 3806 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
@@ -3917,12 +3991,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3917 3991
3918void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3992void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3919{ 3993{
3920 struct net_device *dev = (v == SEQ_START_TOKEN) ? 3994 struct net_device *dev = v;
3921 first_net_device(seq_file_net(seq)) : 3995
3922 next_net_device((struct net_device *)v); 3996 if (v == SEQ_START_TOKEN)
3997 dev = first_net_device_rcu(seq_file_net(seq));
3998 else
3999 dev = next_net_device_rcu(dev);
3923 4000
3924 ++*pos; 4001 ++*pos;
3925 return rcu_dereference(dev); 4002 return dev;
3926} 4003}
3927 4004
3928void dev_seq_stop(struct seq_file *seq, void *v) 4005void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4206,15 +4283,14 @@ static int __init dev_proc_init(void)
4206 4283
4207 4284
4208/** 4285/**
4209 * netdev_set_master - set up master/slave pair 4286 * netdev_set_master - set up master pointer
4210 * @slave: slave device 4287 * @slave: slave device
4211 * @master: new master device 4288 * @master: new master device
4212 * 4289 *
4213 * Changes the master device of the slave. Pass %NULL to break the 4290 * Changes the master device of the slave. Pass %NULL to break the
4214 * bonding. The caller must hold the RTNL semaphore. On a failure 4291 * bonding. The caller must hold the RTNL semaphore. On a failure
4215 * a negative errno code is returned. On success the reference counts 4292 * a negative errno code is returned. On success the reference counts
4216 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the 4293 * are adjusted and the function returns zero.
4217 * function returns zero.
4218 */ 4294 */
4219int netdev_set_master(struct net_device *slave, struct net_device *master) 4295int netdev_set_master(struct net_device *slave, struct net_device *master)
4220{ 4296{
@@ -4234,6 +4310,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4234 synchronize_net(); 4310 synchronize_net();
4235 dev_put(old); 4311 dev_put(old);
4236 } 4312 }
4313 return 0;
4314}
4315EXPORT_SYMBOL(netdev_set_master);
4316
4317/**
4318 * netdev_set_bond_master - set up bonding master/slave pair
4319 * @slave: slave device
4320 * @master: new master device
4321 *
4322 * Changes the master device of the slave. Pass %NULL to break the
4323 * bonding. The caller must hold the RTNL semaphore. On a failure
4324 * a negative errno code is returned. On success %RTM_NEWLINK is sent
4325 * to the routing socket and the function returns zero.
4326 */
4327int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4328{
4329 int err;
4330
4331 ASSERT_RTNL();
4332
4333 err = netdev_set_master(slave, master);
4334 if (err)
4335 return err;
4237 if (master) 4336 if (master)
4238 slave->flags |= IFF_SLAVE; 4337 slave->flags |= IFF_SLAVE;
4239 else 4338 else
@@ -4242,7 +4341,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
4242 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); 4341 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4243 return 0; 4342 return 0;
4244} 4343}
4245EXPORT_SYMBOL(netdev_set_master); 4344EXPORT_SYMBOL(netdev_set_bond_master);
4246 4345
4247static void dev_change_rx_flags(struct net_device *dev, int flags) 4346static void dev_change_rx_flags(struct net_device *dev, int flags)
4248{ 4347{
@@ -4579,6 +4678,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
4579EXPORT_SYMBOL(dev_set_mtu); 4678EXPORT_SYMBOL(dev_set_mtu);
4580 4679
4581/** 4680/**
4681 * dev_set_group - Change group this device belongs to
4682 * @dev: device
4683 * @new_group: group this device should belong to
4684 */
4685void dev_set_group(struct net_device *dev, int new_group)
4686{
4687 dev->group = new_group;
4688}
4689EXPORT_SYMBOL(dev_set_group);
4690
4691/**
4582 * dev_set_mac_address - Change Media Access Control Address 4692 * dev_set_mac_address - Change Media Access Control Address
4583 * @dev: device 4693 * @dev: device
4584 * @sa: new address 4694 * @sa: new address
@@ -4663,7 +4773,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
4663 * is never reached 4773 * is never reached
4664 */ 4774 */
4665 WARN_ON(1); 4775 WARN_ON(1);
4666 err = -EINVAL; 4776 err = -ENOTTY;
4667 break; 4777 break;
4668 4778
4669 } 4779 }
@@ -4931,7 +5041,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4931 /* Set the per device memory buffer space. 5041 /* Set the per device memory buffer space.
4932 * Not applicable in our case */ 5042 * Not applicable in our case */
4933 case SIOCSIFLINK: 5043 case SIOCSIFLINK:
4934 return -EINVAL; 5044 return -ENOTTY;
4935 5045
4936 /* 5046 /*
4937 * Unknown or private ioctl. 5047 * Unknown or private ioctl.
@@ -4952,7 +5062,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4952 /* Take care of Wireless Extensions */ 5062 /* Take care of Wireless Extensions */
4953 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) 5063 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4954 return wext_handle_ioctl(net, &ifr, cmd, arg); 5064 return wext_handle_ioctl(net, &ifr, cmd, arg);
4955 return -EINVAL; 5065 return -ENOTTY;
4956 } 5066 }
4957} 5067}
4958 5068
@@ -5069,41 +5179,59 @@ static void rollback_registered(struct net_device *dev)
5069 list_del(&single); 5179 list_del(&single);
5070} 5180}
5071 5181
5072unsigned long netdev_fix_features(unsigned long features, const char *name) 5182u32 netdev_fix_features(struct net_device *dev, u32 features)
5073{ 5183{
5184 /* Fix illegal checksum combinations */
5185 if ((features & NETIF_F_HW_CSUM) &&
5186 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5187 netdev_info(dev, "mixed HW and IP checksum settings.\n");
5188 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5189 }
5190
5191 if ((features & NETIF_F_NO_CSUM) &&
5192 (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5193 netdev_info(dev, "mixed no checksumming and other settings.\n");
5194 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5195 }
5196
5074 /* Fix illegal SG+CSUM combinations. */ 5197 /* Fix illegal SG+CSUM combinations. */
5075 if ((features & NETIF_F_SG) && 5198 if ((features & NETIF_F_SG) &&
5076 !(features & NETIF_F_ALL_CSUM)) { 5199 !(features & NETIF_F_ALL_CSUM)) {
5077 if (name) 5200 netdev_info(dev,
5078 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " 5201 "Dropping NETIF_F_SG since no checksum feature.\n");
5079 "checksum feature.\n", name);
5080 features &= ~NETIF_F_SG; 5202 features &= ~NETIF_F_SG;
5081 } 5203 }
5082 5204
5083 /* TSO requires that SG is present as well. */ 5205 /* TSO requires that SG is present as well. */
5084 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { 5206 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5085 if (name) 5207 netdev_info(dev, "Dropping TSO features since no SG feature.\n");
5086 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " 5208 features &= ~NETIF_F_ALL_TSO;
5087 "SG feature.\n", name); 5209 }
5088 features &= ~NETIF_F_TSO; 5210
5211 /* TSO ECN requires that TSO is present as well. */
5212 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5213 features &= ~NETIF_F_TSO_ECN;
5214
5215 /* Software GSO depends on SG. */
5216 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5217 netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5218 features &= ~NETIF_F_GSO;
5089 } 5219 }
5090 5220
5221 /* UFO needs SG and checksumming */
5091 if (features & NETIF_F_UFO) { 5222 if (features & NETIF_F_UFO) {
5092 /* maybe split UFO into V4 and V6? */ 5223 /* maybe split UFO into V4 and V6? */
5093 if (!((features & NETIF_F_GEN_CSUM) || 5224 if (!((features & NETIF_F_GEN_CSUM) ||
5094 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 5225 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5095 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5226 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5096 if (name) 5227 netdev_info(dev,
5097 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5228 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5098 "since no checksum offload features.\n",
5099 name);
5100 features &= ~NETIF_F_UFO; 5229 features &= ~NETIF_F_UFO;
5101 } 5230 }
5102 5231
5103 if (!(features & NETIF_F_SG)) { 5232 if (!(features & NETIF_F_SG)) {
5104 if (name) 5233 netdev_info(dev,
5105 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5234 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5106 "since no NETIF_F_SG feature.\n", name);
5107 features &= ~NETIF_F_UFO; 5235 features &= ~NETIF_F_UFO;
5108 } 5236 }
5109 } 5237 }
@@ -5112,6 +5240,37 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
5112} 5240}
5113EXPORT_SYMBOL(netdev_fix_features); 5241EXPORT_SYMBOL(netdev_fix_features);
5114 5242
5243void netdev_update_features(struct net_device *dev)
5244{
5245 u32 features;
5246 int err = 0;
5247
5248 features = netdev_get_wanted_features(dev);
5249
5250 if (dev->netdev_ops->ndo_fix_features)
5251 features = dev->netdev_ops->ndo_fix_features(dev, features);
5252
5253 /* driver might be less strict about feature dependencies */
5254 features = netdev_fix_features(dev, features);
5255
5256 if (dev->features == features)
5257 return;
5258
5259 netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
5260 dev->features, features);
5261
5262 if (dev->netdev_ops->ndo_set_features)
5263 err = dev->netdev_ops->ndo_set_features(dev, features);
5264
5265 if (!err)
5266 dev->features = features;
5267 else if (err < 0)
5268 netdev_err(dev,
5269 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
5270 err, features, dev->features);
5271}
5272EXPORT_SYMBOL(netdev_update_features);
5273
5115/** 5274/**
5116 * netif_stacked_transfer_operstate - transfer operstate 5275 * netif_stacked_transfer_operstate - transfer operstate
5117 * @rootdev: the root or lower level device to transfer state from 5276 * @rootdev: the root or lower level device to transfer state from
@@ -5246,27 +5405,19 @@ int register_netdevice(struct net_device *dev)
5246 if (dev->iflink == -1) 5405 if (dev->iflink == -1)
5247 dev->iflink = dev->ifindex; 5406 dev->iflink = dev->ifindex;
5248 5407
5249 /* Fix illegal checksum combinations */ 5408 /* Transfer changeable features to wanted_features and enable
5250 if ((dev->features & NETIF_F_HW_CSUM) && 5409 * software offloads (GSO and GRO).
5251 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5410 */
5252 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", 5411 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5253 dev->name); 5412 dev->features |= NETIF_F_SOFT_FEATURES;
5254 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 5413 dev->wanted_features = dev->features & dev->hw_features;
5255 }
5256 5414
5257 if ((dev->features & NETIF_F_NO_CSUM) && 5415 /* Avoid warning from netdev_fix_features() for GSO without SG */
5258 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5416 if (!(dev->wanted_features & NETIF_F_SG)) {
5259 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", 5417 dev->wanted_features &= ~NETIF_F_GSO;
5260 dev->name); 5418 dev->features &= ~NETIF_F_GSO;
5261 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5262 } 5419 }
5263 5420
5264 dev->features = netdev_fix_features(dev->features, dev->name);
5265
5266 /* Enable software GSO if SG is supported. */
5267 if (dev->features & NETIF_F_SG)
5268 dev->features |= NETIF_F_GSO;
5269
5270 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, 5421 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5271 * vlan_dev_init() will do the dev->features check, so these features 5422 * vlan_dev_init() will do the dev->features check, so these features
5272 * are enabled only if supported by underlying device. 5423 * are enabled only if supported by underlying device.
@@ -5283,6 +5434,8 @@ int register_netdevice(struct net_device *dev)
5283 goto err_uninit; 5434 goto err_uninit;
5284 dev->reg_state = NETREG_REGISTERED; 5435 dev->reg_state = NETREG_REGISTERED;
5285 5436
5437 netdev_update_features(dev);
5438
5286 /* 5439 /*
5287 * Default initial state at registry is that the 5440 * Default initial state at registry is that the
5288 * device is present. 5441 * device is present.
@@ -5687,6 +5840,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5687#endif 5840#endif
5688 5841
5689 strcpy(dev->name, name); 5842 strcpy(dev->name, name);
5843 dev->group = INIT_NETDEV_GROUP;
5690 return dev; 5844 return dev;
5691 5845
5692free_all: 5846free_all:
@@ -6001,8 +6155,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6001 * @one to the master device with current feature set @all. Will not 6155 * @one to the master device with current feature set @all. Will not
6002 * enable anything that is off in @mask. Returns the new feature set. 6156 * enable anything that is off in @mask. Returns the new feature set.
6003 */ 6157 */
6004unsigned long netdev_increment_features(unsigned long all, unsigned long one, 6158u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6005 unsigned long mask)
6006{ 6159{
6007 /* If device needs checksumming, downgrade to it. */ 6160 /* If device needs checksumming, downgrade to it. */
6008 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) 6161 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
@@ -6187,7 +6340,7 @@ static void __net_exit default_device_exit(struct net *net)
6187 if (dev->rtnl_link_ops) 6340 if (dev->rtnl_link_ops)
6188 continue; 6341 continue;
6189 6342
6190 /* Push remaing network devices to init_net */ 6343 /* Push remaining network devices to init_net */
6191 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 6344 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6192 err = dev_change_net_namespace(dev, &init_net, fb_name); 6345 err = dev_change_net_namespace(dev, &init_net, fb_name);
6193 if (err) { 6346 if (err) {