diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/dev.c | 38 | ||||
| -rw-r--r-- | net/core/drop_monitor.c | 102 | ||||
| -rw-r--r-- | net/core/filter.c | 4 | ||||
| -rw-r--r-- | net/core/neighbour.c | 14 | ||||
| -rw-r--r-- | net/core/net_namespace.c | 4 | ||||
| -rw-r--r-- | net/core/netpoll.c | 11 | ||||
| -rw-r--r-- | net/core/netprio_cgroup.c | 78 | ||||
| -rw-r--r-- | net/core/skbuff.c | 4 |
8 files changed, 121 insertions, 134 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index cd0981977f5c..1cb0d8a6aa6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -1136,8 +1136,8 @@ void dev_load(struct net *net, const char *name) | |||
| 1136 | no_module = request_module("netdev-%s", name); | 1136 | no_module = request_module("netdev-%s", name); |
| 1137 | if (no_module && capable(CAP_SYS_MODULE)) { | 1137 | if (no_module && capable(CAP_SYS_MODULE)) { |
| 1138 | if (!request_module("%s", name)) | 1138 | if (!request_module("%s", name)) |
| 1139 | pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", | 1139 | pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", |
| 1140 | name); | 1140 | name); |
| 1141 | } | 1141 | } |
| 1142 | } | 1142 | } |
| 1143 | EXPORT_SYMBOL(dev_load); | 1143 | EXPORT_SYMBOL(dev_load); |
| @@ -2089,25 +2089,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) | |||
| 2089 | return 0; | 2089 | return 0; |
| 2090 | } | 2090 | } |
| 2091 | 2091 | ||
| 2092 | /* | ||
| 2093 | * Try to orphan skb early, right before transmission by the device. | ||
| 2094 | * We cannot orphan skb if tx timestamp is requested or the sk-reference | ||
| 2095 | * is needed on driver level for other reasons, e.g. see net/can/raw.c | ||
| 2096 | */ | ||
| 2097 | static inline void skb_orphan_try(struct sk_buff *skb) | ||
| 2098 | { | ||
| 2099 | struct sock *sk = skb->sk; | ||
| 2100 | |||
| 2101 | if (sk && !skb_shinfo(skb)->tx_flags) { | ||
| 2102 | /* skb_tx_hash() wont be able to get sk. | ||
| 2103 | * We copy sk_hash into skb->rxhash | ||
| 2104 | */ | ||
| 2105 | if (!skb->rxhash) | ||
| 2106 | skb->rxhash = sk->sk_hash; | ||
| 2107 | skb_orphan(skb); | ||
| 2108 | } | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) | 2092 | static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) |
| 2112 | { | 2093 | { |
| 2113 | return ((features & NETIF_F_GEN_CSUM) || | 2094 | return ((features & NETIF_F_GEN_CSUM) || |
| @@ -2193,8 +2174,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 2193 | if (!list_empty(&ptype_all)) | 2174 | if (!list_empty(&ptype_all)) |
| 2194 | dev_queue_xmit_nit(skb, dev); | 2175 | dev_queue_xmit_nit(skb, dev); |
| 2195 | 2176 | ||
| 2196 | skb_orphan_try(skb); | ||
| 2197 | |||
| 2198 | features = netif_skb_features(skb); | 2177 | features = netif_skb_features(skb); |
| 2199 | 2178 | ||
| 2200 | if (vlan_tx_tag_present(skb) && | 2179 | if (vlan_tx_tag_present(skb) && |
| @@ -2304,7 +2283,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | |||
| 2304 | if (skb->sk && skb->sk->sk_hash) | 2283 | if (skb->sk && skb->sk->sk_hash) |
| 2305 | hash = skb->sk->sk_hash; | 2284 | hash = skb->sk->sk_hash; |
| 2306 | else | 2285 | else |
| 2307 | hash = (__force u16) skb->protocol ^ skb->rxhash; | 2286 | hash = (__force u16) skb->protocol; |
| 2308 | hash = jhash_1word(hash, hashrnd); | 2287 | hash = jhash_1word(hash, hashrnd); |
| 2309 | 2288 | ||
| 2310 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; | 2289 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; |
| @@ -2465,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb) | |||
| 2465 | { | 2444 | { |
| 2466 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); | 2445 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); |
| 2467 | 2446 | ||
| 2468 | if ((!skb->priority) && (skb->sk) && map) | 2447 | if (!skb->priority && skb->sk && map) { |
| 2469 | skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; | 2448 | unsigned int prioidx = skb->sk->sk_cgrp_prioidx; |
| 2449 | |||
| 2450 | if (prioidx < map->priomap_len) | ||
| 2451 | skb->priority = map->priomap[prioidx]; | ||
| 2452 | } | ||
| 2470 | } | 2453 | } |
| 2471 | #else | 2454 | #else |
| 2472 | #define skb_update_prio(skb) | 2455 | #define skb_update_prio(skb) |
| @@ -6300,7 +6283,8 @@ static struct hlist_head *netdev_create_hash(void) | |||
| 6300 | /* Initialize per network namespace state */ | 6283 | /* Initialize per network namespace state */ |
| 6301 | static int __net_init netdev_init(struct net *net) | 6284 | static int __net_init netdev_init(struct net *net) |
| 6302 | { | 6285 | { |
| 6303 | INIT_LIST_HEAD(&net->dev_base_head); | 6286 | if (net != &init_net) |
| 6287 | INIT_LIST_HEAD(&net->dev_base_head); | ||
| 6304 | 6288 | ||
| 6305 | net->dev_name_head = netdev_create_hash(); | 6289 | net->dev_name_head = netdev_create_hash(); |
| 6306 | if (net->dev_name_head == NULL) | 6290 | if (net->dev_name_head == NULL) |
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ea5fb9fcc3f5..d23b6682f4e9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c | |||
| @@ -36,9 +36,6 @@ | |||
| 36 | #define TRACE_ON 1 | 36 | #define TRACE_ON 1 |
| 37 | #define TRACE_OFF 0 | 37 | #define TRACE_OFF 0 |
| 38 | 38 | ||
| 39 | static void send_dm_alert(struct work_struct *unused); | ||
| 40 | |||
| 41 | |||
| 42 | /* | 39 | /* |
| 43 | * Globals, our netlink socket pointer | 40 | * Globals, our netlink socket pointer |
| 44 | * and the work handle that will send up | 41 | * and the work handle that will send up |
| @@ -48,11 +45,10 @@ static int trace_state = TRACE_OFF; | |||
| 48 | static DEFINE_MUTEX(trace_state_mutex); | 45 | static DEFINE_MUTEX(trace_state_mutex); |
| 49 | 46 | ||
| 50 | struct per_cpu_dm_data { | 47 | struct per_cpu_dm_data { |
| 51 | struct work_struct dm_alert_work; | 48 | spinlock_t lock; |
| 52 | struct sk_buff __rcu *skb; | 49 | struct sk_buff *skb; |
| 53 | atomic_t dm_hit_count; | 50 | struct work_struct dm_alert_work; |
| 54 | struct timer_list send_timer; | 51 | struct timer_list send_timer; |
| 55 | int cpu; | ||
| 56 | }; | 52 | }; |
| 57 | 53 | ||
| 58 | struct dm_hw_stat_delta { | 54 | struct dm_hw_stat_delta { |
| @@ -78,13 +74,13 @@ static int dm_delay = 1; | |||
| 78 | static unsigned long dm_hw_check_delta = 2*HZ; | 74 | static unsigned long dm_hw_check_delta = 2*HZ; |
| 79 | static LIST_HEAD(hw_stats_list); | 75 | static LIST_HEAD(hw_stats_list); |
| 80 | 76 | ||
| 81 | static void reset_per_cpu_data(struct per_cpu_dm_data *data) | 77 | static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) |
| 82 | { | 78 | { |
| 83 | size_t al; | 79 | size_t al; |
| 84 | struct net_dm_alert_msg *msg; | 80 | struct net_dm_alert_msg *msg; |
| 85 | struct nlattr *nla; | 81 | struct nlattr *nla; |
| 86 | struct sk_buff *skb; | 82 | struct sk_buff *skb; |
| 87 | struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); | 83 | unsigned long flags; |
| 88 | 84 | ||
| 89 | al = sizeof(struct net_dm_alert_msg); | 85 | al = sizeof(struct net_dm_alert_msg); |
| 90 | al += dm_hit_limit * sizeof(struct net_dm_drop_point); | 86 | al += dm_hit_limit * sizeof(struct net_dm_drop_point); |
| @@ -99,65 +95,40 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data) | |||
| 99 | sizeof(struct net_dm_alert_msg)); | 95 | sizeof(struct net_dm_alert_msg)); |
| 100 | msg = nla_data(nla); | 96 | msg = nla_data(nla); |
| 101 | memset(msg, 0, al); | 97 | memset(msg, 0, al); |
| 102 | } else | 98 | } else { |
| 103 | schedule_work_on(data->cpu, &data->dm_alert_work); | 99 | mod_timer(&data->send_timer, jiffies + HZ / 10); |
| 104 | |||
| 105 | /* | ||
| 106 | * Don't need to lock this, since we are guaranteed to only | ||
| 107 | * run this on a single cpu at a time. | ||
| 108 | * Note also that we only update data->skb if the old and new skb | ||
| 109 | * pointers don't match. This ensures that we don't continually call | ||
| 110 | * synchornize_rcu if we repeatedly fail to alloc a new netlink message. | ||
| 111 | */ | ||
| 112 | if (skb != oskb) { | ||
| 113 | rcu_assign_pointer(data->skb, skb); | ||
| 114 | |||
| 115 | synchronize_rcu(); | ||
| 116 | |||
| 117 | atomic_set(&data->dm_hit_count, dm_hit_limit); | ||
| 118 | } | 100 | } |
| 119 | 101 | ||
| 102 | spin_lock_irqsave(&data->lock, flags); | ||
| 103 | swap(data->skb, skb); | ||
| 104 | spin_unlock_irqrestore(&data->lock, flags); | ||
| 105 | |||
| 106 | return skb; | ||
| 120 | } | 107 | } |
| 121 | 108 | ||
| 122 | static void send_dm_alert(struct work_struct *unused) | 109 | static void send_dm_alert(struct work_struct *work) |
| 123 | { | 110 | { |
| 124 | struct sk_buff *skb; | 111 | struct sk_buff *skb; |
| 125 | struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); | 112 | struct per_cpu_dm_data *data; |
| 126 | 113 | ||
| 127 | WARN_ON_ONCE(data->cpu != smp_processor_id()); | 114 | data = container_of(work, struct per_cpu_dm_data, dm_alert_work); |
| 128 | 115 | ||
| 129 | /* | 116 | skb = reset_per_cpu_data(data); |
| 130 | * Grab the skb we're about to send | ||
| 131 | */ | ||
| 132 | skb = rcu_dereference_protected(data->skb, 1); | ||
| 133 | |||
| 134 | /* | ||
| 135 | * Replace it with a new one | ||
| 136 | */ | ||
| 137 | reset_per_cpu_data(data); | ||
| 138 | 117 | ||
| 139 | /* | ||
| 140 | * Ship it! | ||
| 141 | */ | ||
| 142 | if (skb) | 118 | if (skb) |
| 143 | genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); | 119 | genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); |
| 144 | |||
| 145 | put_cpu_var(dm_cpu_data); | ||
| 146 | } | 120 | } |
| 147 | 121 | ||
| 148 | /* | 122 | /* |
| 149 | * This is the timer function to delay the sending of an alert | 123 | * This is the timer function to delay the sending of an alert |
| 150 | * in the event that more drops will arrive during the | 124 | * in the event that more drops will arrive during the |
| 151 | * hysteresis period. Note that it operates under the timer interrupt | 125 | * hysteresis period. |
| 152 | * so we don't need to disable preemption here | ||
| 153 | */ | 126 | */ |
| 154 | static void sched_send_work(unsigned long unused) | 127 | static void sched_send_work(unsigned long _data) |
| 155 | { | 128 | { |
| 156 | struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); | 129 | struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; |
| 157 | |||
| 158 | schedule_work_on(smp_processor_id(), &data->dm_alert_work); | ||
| 159 | 130 | ||
| 160 | put_cpu_var(dm_cpu_data); | 131 | schedule_work(&data->dm_alert_work); |
| 161 | } | 132 | } |
| 162 | 133 | ||
| 163 | static void trace_drop_common(struct sk_buff *skb, void *location) | 134 | static void trace_drop_common(struct sk_buff *skb, void *location) |
| @@ -167,33 +138,28 @@ static void trace_drop_common(struct sk_buff *skb, void *location) | |||
| 167 | struct nlattr *nla; | 138 | struct nlattr *nla; |
| 168 | int i; | 139 | int i; |
| 169 | struct sk_buff *dskb; | 140 | struct sk_buff *dskb; |
| 170 | struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); | 141 | struct per_cpu_dm_data *data; |
| 171 | 142 | unsigned long flags; | |
| 172 | 143 | ||
| 173 | rcu_read_lock(); | 144 | local_irq_save(flags); |
| 174 | dskb = rcu_dereference(data->skb); | 145 | data = &__get_cpu_var(dm_cpu_data); |
| 146 | spin_lock(&data->lock); | ||
| 147 | dskb = data->skb; | ||
| 175 | 148 | ||
| 176 | if (!dskb) | 149 | if (!dskb) |
| 177 | goto out; | 150 | goto out; |
| 178 | 151 | ||
| 179 | if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { | ||
| 180 | /* | ||
| 181 | * we're already at zero, discard this hit | ||
| 182 | */ | ||
| 183 | goto out; | ||
| 184 | } | ||
| 185 | |||
| 186 | nlh = (struct nlmsghdr *)dskb->data; | 152 | nlh = (struct nlmsghdr *)dskb->data; |
| 187 | nla = genlmsg_data(nlmsg_data(nlh)); | 153 | nla = genlmsg_data(nlmsg_data(nlh)); |
| 188 | msg = nla_data(nla); | 154 | msg = nla_data(nla); |
| 189 | for (i = 0; i < msg->entries; i++) { | 155 | for (i = 0; i < msg->entries; i++) { |
| 190 | if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { | 156 | if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { |
| 191 | msg->points[i].count++; | 157 | msg->points[i].count++; |
| 192 | atomic_inc(&data->dm_hit_count); | ||
| 193 | goto out; | 158 | goto out; |
| 194 | } | 159 | } |
| 195 | } | 160 | } |
| 196 | 161 | if (msg->entries == dm_hit_limit) | |
| 162 | goto out; | ||
| 197 | /* | 163 | /* |
| 198 | * We need to create a new entry | 164 | * We need to create a new entry |
| 199 | */ | 165 | */ |
| @@ -205,13 +171,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location) | |||
| 205 | 171 | ||
| 206 | if (!timer_pending(&data->send_timer)) { | 172 | if (!timer_pending(&data->send_timer)) { |
| 207 | data->send_timer.expires = jiffies + dm_delay * HZ; | 173 | data->send_timer.expires = jiffies + dm_delay * HZ; |
| 208 | add_timer_on(&data->send_timer, smp_processor_id()); | 174 | add_timer(&data->send_timer); |
| 209 | } | 175 | } |
| 210 | 176 | ||
| 211 | out: | 177 | out: |
| 212 | rcu_read_unlock(); | 178 | spin_unlock_irqrestore(&data->lock, flags); |
| 213 | put_cpu_var(dm_cpu_data); | ||
| 214 | return; | ||
| 215 | } | 179 | } |
| 216 | 180 | ||
| 217 | static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) | 181 | static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) |
| @@ -418,11 +382,11 @@ static int __init init_net_drop_monitor(void) | |||
| 418 | 382 | ||
| 419 | for_each_possible_cpu(cpu) { | 383 | for_each_possible_cpu(cpu) { |
| 420 | data = &per_cpu(dm_cpu_data, cpu); | 384 | data = &per_cpu(dm_cpu_data, cpu); |
| 421 | data->cpu = cpu; | ||
| 422 | INIT_WORK(&data->dm_alert_work, send_dm_alert); | 385 | INIT_WORK(&data->dm_alert_work, send_dm_alert); |
| 423 | init_timer(&data->send_timer); | 386 | init_timer(&data->send_timer); |
| 424 | data->send_timer.data = cpu; | 387 | data->send_timer.data = (unsigned long)data; |
| 425 | data->send_timer.function = sched_send_work; | 388 | data->send_timer.function = sched_send_work; |
| 389 | spin_lock_init(&data->lock); | ||
| 426 | reset_per_cpu_data(data); | 390 | reset_per_cpu_data(data); |
| 427 | } | 391 | } |
| 428 | 392 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index a3eddb515d1b..d4ce2dc712e3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
| @@ -616,9 +616,9 @@ static int __sk_prepare_filter(struct sk_filter *fp) | |||
| 616 | /** | 616 | /** |
| 617 | * sk_unattached_filter_create - create an unattached filter | 617 | * sk_unattached_filter_create - create an unattached filter |
| 618 | * @fprog: the filter program | 618 | * @fprog: the filter program |
| 619 | * @sk: the socket to use | 619 | * @pfp: the unattached filter that is created |
| 620 | * | 620 | * |
| 621 | * Create a filter independent ofr any socket. We first run some | 621 | * Create a filter independent of any socket. We first run some |
| 622 | * sanity checks on it to make sure it does not explode on us later. | 622 | * sanity checks on it to make sure it does not explode on us later. |
| 623 | * If an error occurs or there is insufficient memory for the filter | 623 | * If an error occurs or there is insufficient memory for the filter |
| 624 | * a negative errno code is returned. On success the return is zero. | 624 | * a negative errno code is returned. On success the return is zero. |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eb09f8bbbf07..d81d026138f0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
| @@ -2219,9 +2219,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, | |||
| 2219 | rcu_read_lock_bh(); | 2219 | rcu_read_lock_bh(); |
| 2220 | nht = rcu_dereference_bh(tbl->nht); | 2220 | nht = rcu_dereference_bh(tbl->nht); |
| 2221 | 2221 | ||
| 2222 | for (h = 0; h < (1 << nht->hash_shift); h++) { | 2222 | for (h = s_h; h < (1 << nht->hash_shift); h++) { |
| 2223 | if (h < s_h) | ||
| 2224 | continue; | ||
| 2225 | if (h > s_h) | 2223 | if (h > s_h) |
| 2226 | s_idx = 0; | 2224 | s_idx = 0; |
| 2227 | for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; | 2225 | for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; |
| @@ -2260,9 +2258,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, | |||
| 2260 | 2258 | ||
| 2261 | read_lock_bh(&tbl->lock); | 2259 | read_lock_bh(&tbl->lock); |
| 2262 | 2260 | ||
| 2263 | for (h = 0; h <= PNEIGH_HASHMASK; h++) { | 2261 | for (h = s_h; h <= PNEIGH_HASHMASK; h++) { |
| 2264 | if (h < s_h) | ||
| 2265 | continue; | ||
| 2266 | if (h > s_h) | 2262 | if (h > s_h) |
| 2267 | s_idx = 0; | 2263 | s_idx = 0; |
| 2268 | for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { | 2264 | for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { |
| @@ -2297,7 +2293,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 2297 | struct neigh_table *tbl; | 2293 | struct neigh_table *tbl; |
| 2298 | int t, family, s_t; | 2294 | int t, family, s_t; |
| 2299 | int proxy = 0; | 2295 | int proxy = 0; |
| 2300 | int err = 0; | 2296 | int err; |
| 2301 | 2297 | ||
| 2302 | read_lock(&neigh_tbl_lock); | 2298 | read_lock(&neigh_tbl_lock); |
| 2303 | family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; | 2299 | family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; |
| @@ -2311,7 +2307,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 2311 | 2307 | ||
| 2312 | s_t = cb->args[0]; | 2308 | s_t = cb->args[0]; |
| 2313 | 2309 | ||
| 2314 | for (tbl = neigh_tables, t = 0; tbl && (err >= 0); | 2310 | for (tbl = neigh_tables, t = 0; tbl; |
| 2315 | tbl = tbl->next, t++) { | 2311 | tbl = tbl->next, t++) { |
| 2316 | if (t < s_t || (family && tbl->family != family)) | 2312 | if (t < s_t || (family && tbl->family != family)) |
| 2317 | continue; | 2313 | continue; |
| @@ -2322,6 +2318,8 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 2322 | err = pneigh_dump_table(tbl, skb, cb); | 2318 | err = pneigh_dump_table(tbl, skb, cb); |
| 2323 | else | 2319 | else |
| 2324 | err = neigh_dump_table(tbl, skb, cb); | 2320 | err = neigh_dump_table(tbl, skb, cb); |
| 2321 | if (err < 0) | ||
| 2322 | break; | ||
| 2325 | } | 2323 | } |
| 2326 | read_unlock(&neigh_tbl_lock); | 2324 | read_unlock(&neigh_tbl_lock); |
| 2327 | 2325 | ||
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dddbacb8f28c..42f1e1c7514f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
| @@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex); | |||
| 27 | LIST_HEAD(net_namespace_list); | 27 | LIST_HEAD(net_namespace_list); |
| 28 | EXPORT_SYMBOL_GPL(net_namespace_list); | 28 | EXPORT_SYMBOL_GPL(net_namespace_list); |
| 29 | 29 | ||
| 30 | struct net init_net; | 30 | struct net init_net = { |
| 31 | .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), | ||
| 32 | }; | ||
| 31 | EXPORT_SYMBOL(init_net); | 33 | EXPORT_SYMBOL(init_net); |
| 32 | 34 | ||
| 33 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ | 35 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3d84fb9d8873..f9f40b932e4b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
| @@ -362,22 +362,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev); | |||
| 362 | 362 | ||
| 363 | void netpoll_send_udp(struct netpoll *np, const char *msg, int len) | 363 | void netpoll_send_udp(struct netpoll *np, const char *msg, int len) |
| 364 | { | 364 | { |
| 365 | int total_len, eth_len, ip_len, udp_len; | 365 | int total_len, ip_len, udp_len; |
| 366 | struct sk_buff *skb; | 366 | struct sk_buff *skb; |
| 367 | struct udphdr *udph; | 367 | struct udphdr *udph; |
| 368 | struct iphdr *iph; | 368 | struct iphdr *iph; |
| 369 | struct ethhdr *eth; | 369 | struct ethhdr *eth; |
| 370 | 370 | ||
| 371 | udp_len = len + sizeof(*udph); | 371 | udp_len = len + sizeof(*udph); |
| 372 | ip_len = eth_len = udp_len + sizeof(*iph); | 372 | ip_len = udp_len + sizeof(*iph); |
| 373 | total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; | 373 | total_len = ip_len + LL_RESERVED_SPACE(np->dev); |
| 374 | 374 | ||
| 375 | skb = find_skb(np, total_len, total_len - len); | 375 | skb = find_skb(np, total_len + np->dev->needed_tailroom, |
| 376 | total_len - len); | ||
| 376 | if (!skb) | 377 | if (!skb) |
| 377 | return; | 378 | return; |
| 378 | 379 | ||
| 379 | skb_copy_to_linear_data(skb, msg, len); | 380 | skb_copy_to_linear_data(skb, msg, len); |
| 380 | skb->len += len; | 381 | skb_put(skb, len); |
| 381 | 382 | ||
| 382 | skb_push(skb, sizeof(*udph)); | 383 | skb_push(skb, sizeof(*udph)); |
| 383 | skb_reset_transport_header(skb); | 384 | skb_reset_transport_header(skb); |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5b8aa2fae48b..b2e9caa1ad1a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
| @@ -49,8 +49,9 @@ static int get_prioidx(u32 *prio) | |||
| 49 | return -ENOSPC; | 49 | return -ENOSPC; |
| 50 | } | 50 | } |
| 51 | set_bit(prioidx, prioidx_map); | 51 | set_bit(prioidx, prioidx_map); |
| 52 | if (atomic_read(&max_prioidx) < prioidx) | ||
| 53 | atomic_set(&max_prioidx, prioidx); | ||
| 52 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | 54 | spin_unlock_irqrestore(&prioidx_map_lock, flags); |
| 53 | atomic_set(&max_prioidx, prioidx); | ||
| 54 | *prio = prioidx; | 55 | *prio = prioidx; |
| 55 | return 0; | 56 | return 0; |
| 56 | } | 57 | } |
| @@ -64,7 +65,7 @@ static void put_prioidx(u32 idx) | |||
| 64 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | 65 | spin_unlock_irqrestore(&prioidx_map_lock, flags); |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 67 | static void extend_netdev_table(struct net_device *dev, u32 new_len) | 68 | static int extend_netdev_table(struct net_device *dev, u32 new_len) |
| 68 | { | 69 | { |
| 69 | size_t new_size = sizeof(struct netprio_map) + | 70 | size_t new_size = sizeof(struct netprio_map) + |
| 70 | ((sizeof(u32) * new_len)); | 71 | ((sizeof(u32) * new_len)); |
| @@ -76,7 +77,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) | |||
| 76 | 77 | ||
| 77 | if (!new_priomap) { | 78 | if (!new_priomap) { |
| 78 | pr_warn("Unable to alloc new priomap!\n"); | 79 | pr_warn("Unable to alloc new priomap!\n"); |
| 79 | return; | 80 | return -ENOMEM; |
| 80 | } | 81 | } |
| 81 | 82 | ||
| 82 | for (i = 0; | 83 | for (i = 0; |
| @@ -89,46 +90,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) | |||
| 89 | rcu_assign_pointer(dev->priomap, new_priomap); | 90 | rcu_assign_pointer(dev->priomap, new_priomap); |
| 90 | if (old_priomap) | 91 | if (old_priomap) |
| 91 | kfree_rcu(old_priomap, rcu); | 92 | kfree_rcu(old_priomap, rcu); |
| 93 | return 0; | ||
| 92 | } | 94 | } |
| 93 | 95 | ||
| 94 | static void update_netdev_tables(void) | 96 | static int write_update_netdev_table(struct net_device *dev) |
| 95 | { | 97 | { |
| 98 | int ret = 0; | ||
| 99 | u32 max_len; | ||
| 100 | struct netprio_map *map; | ||
| 101 | |||
| 102 | rtnl_lock(); | ||
| 103 | max_len = atomic_read(&max_prioidx) + 1; | ||
| 104 | map = rtnl_dereference(dev->priomap); | ||
| 105 | if (!map || map->priomap_len < max_len) | ||
| 106 | ret = extend_netdev_table(dev, max_len); | ||
| 107 | rtnl_unlock(); | ||
| 108 | |||
| 109 | return ret; | ||
| 110 | } | ||
| 111 | |||
| 112 | static int update_netdev_tables(void) | ||
| 113 | { | ||
| 114 | int ret = 0; | ||
| 96 | struct net_device *dev; | 115 | struct net_device *dev; |
| 97 | u32 max_len = atomic_read(&max_prioidx) + 1; | 116 | u32 max_len; |
| 98 | struct netprio_map *map; | 117 | struct netprio_map *map; |
| 99 | 118 | ||
| 100 | rtnl_lock(); | 119 | rtnl_lock(); |
| 120 | max_len = atomic_read(&max_prioidx) + 1; | ||
| 101 | for_each_netdev(&init_net, dev) { | 121 | for_each_netdev(&init_net, dev) { |
| 102 | map = rtnl_dereference(dev->priomap); | 122 | map = rtnl_dereference(dev->priomap); |
| 103 | if ((!map) || | 123 | /* |
| 104 | (map->priomap_len < max_len)) | 124 | * don't allocate priomap if we didn't |
| 105 | extend_netdev_table(dev, max_len); | 125 | * change net_prio.ifpriomap (map == NULL), |
| 126 | * this will speed up skb_update_prio. | ||
| 127 | */ | ||
| 128 | if (map && map->priomap_len < max_len) { | ||
| 129 | ret = extend_netdev_table(dev, max_len); | ||
| 130 | if (ret < 0) | ||
| 131 | break; | ||
| 132 | } | ||
| 106 | } | 133 | } |
| 107 | rtnl_unlock(); | 134 | rtnl_unlock(); |
| 135 | return ret; | ||
| 108 | } | 136 | } |
| 109 | 137 | ||
| 110 | static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) | 138 | static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) |
| 111 | { | 139 | { |
| 112 | struct cgroup_netprio_state *cs; | 140 | struct cgroup_netprio_state *cs; |
| 113 | int ret; | 141 | int ret = -EINVAL; |
| 114 | 142 | ||
| 115 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | 143 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
| 116 | if (!cs) | 144 | if (!cs) |
| 117 | return ERR_PTR(-ENOMEM); | 145 | return ERR_PTR(-ENOMEM); |
| 118 | 146 | ||
| 119 | if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { | 147 | if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) |
| 120 | kfree(cs); | 148 | goto out; |
| 121 | return ERR_PTR(-EINVAL); | ||
| 122 | } | ||
| 123 | 149 | ||
| 124 | ret = get_prioidx(&cs->prioidx); | 150 | ret = get_prioidx(&cs->prioidx); |
| 125 | if (ret != 0) { | 151 | if (ret < 0) { |
| 126 | pr_warn("No space in priority index array\n"); | 152 | pr_warn("No space in priority index array\n"); |
| 127 | kfree(cs); | 153 | goto out; |
| 128 | return ERR_PTR(ret); | 154 | } |
| 155 | |||
| 156 | ret = update_netdev_tables(); | ||
| 157 | if (ret < 0) { | ||
| 158 | put_prioidx(cs->prioidx); | ||
| 159 | goto out; | ||
| 129 | } | 160 | } |
| 130 | 161 | ||
| 131 | return &cs->css; | 162 | return &cs->css; |
| 163 | out: | ||
| 164 | kfree(cs); | ||
| 165 | return ERR_PTR(ret); | ||
| 132 | } | 166 | } |
| 133 | 167 | ||
| 134 | static void cgrp_destroy(struct cgroup *cgrp) | 168 | static void cgrp_destroy(struct cgroup *cgrp) |
| @@ -141,7 +175,7 @@ static void cgrp_destroy(struct cgroup *cgrp) | |||
| 141 | rtnl_lock(); | 175 | rtnl_lock(); |
| 142 | for_each_netdev(&init_net, dev) { | 176 | for_each_netdev(&init_net, dev) { |
| 143 | map = rtnl_dereference(dev->priomap); | 177 | map = rtnl_dereference(dev->priomap); |
| 144 | if (map) | 178 | if (map && cs->prioidx < map->priomap_len) |
| 145 | map->priomap[cs->prioidx] = 0; | 179 | map->priomap[cs->prioidx] = 0; |
| 146 | } | 180 | } |
| 147 | rtnl_unlock(); | 181 | rtnl_unlock(); |
| @@ -165,7 +199,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft, | |||
| 165 | rcu_read_lock(); | 199 | rcu_read_lock(); |
| 166 | for_each_netdev_rcu(&init_net, dev) { | 200 | for_each_netdev_rcu(&init_net, dev) { |
| 167 | map = rcu_dereference(dev->priomap); | 201 | map = rcu_dereference(dev->priomap); |
| 168 | priority = map ? map->priomap[prioidx] : 0; | 202 | priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; |
| 169 | cb->fill(cb, dev->name, priority); | 203 | cb->fill(cb, dev->name, priority); |
| 170 | } | 204 | } |
| 171 | rcu_read_unlock(); | 205 | rcu_read_unlock(); |
| @@ -220,13 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | |||
| 220 | if (!dev) | 254 | if (!dev) |
| 221 | goto out_free_devname; | 255 | goto out_free_devname; |
| 222 | 256 | ||
| 223 | update_netdev_tables(); | 257 | ret = write_update_netdev_table(dev); |
| 224 | ret = 0; | 258 | if (ret < 0) |
| 259 | goto out_put_dev; | ||
| 260 | |||
| 225 | rcu_read_lock(); | 261 | rcu_read_lock(); |
| 226 | map = rcu_dereference(dev->priomap); | 262 | map = rcu_dereference(dev->priomap); |
| 227 | if (map) | 263 | if (map) |
| 228 | map->priomap[prioidx] = priority; | 264 | map->priomap[prioidx] = priority; |
| 229 | rcu_read_unlock(); | 265 | rcu_read_unlock(); |
| 266 | |||
| 267 | out_put_dev: | ||
| 230 | dev_put(dev); | 268 | dev_put(dev); |
| 231 | 269 | ||
| 232 | out_free_devname: | 270 | out_free_devname: |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bac3c5756d63..d124306b81fd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -353,7 +353,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
| 353 | unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + | 353 | unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + |
| 354 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 354 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
| 355 | 355 | ||
| 356 | if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) { | 356 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { |
| 357 | void *data = netdev_alloc_frag(fragsz); | 357 | void *data = netdev_alloc_frag(fragsz); |
| 358 | 358 | ||
| 359 | if (likely(data)) { | 359 | if (likely(data)) { |
| @@ -3362,7 +3362,7 @@ EXPORT_SYMBOL(kfree_skb_partial); | |||
| 3362 | * @to: prior buffer | 3362 | * @to: prior buffer |
| 3363 | * @from: buffer to add | 3363 | * @from: buffer to add |
| 3364 | * @fragstolen: pointer to boolean | 3364 | * @fragstolen: pointer to boolean |
| 3365 | * | 3365 | * @delta_truesize: how much more was allocated than was requested |
| 3366 | */ | 3366 | */ |
| 3367 | bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, | 3367 | bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, |
| 3368 | bool *fragstolen, int *delta_truesize) | 3368 | bool *fragstolen, int *delta_truesize) |
