diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-07 06:44:07 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-11 15:54:04 -0400 |
commit | 0ed8ddf4045fcfcac36bad753dc4046118c603ec (patch) | |
tree | cf1d9eb14668c4d2257b3519ed7deec8c5cb396d | |
parent | d122179a3c0fdc71b88cb9e3605f372b1651a9ff (diff) |
neigh: Protect neigh->ha[] with a seqlock
Add a seqlock in struct neighbour to protect neigh->ha[], and avoid
dirtying neighbour in stress situation (many different flows / dsts)
Dirtying takes place because of read_lock(&n->lock) and n->used writes.
Switching to a seqlock, and writing n->used only on jiffies changes
permits less dirtying.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/neighbour.h | 16 | ||||
-rw-r--r-- | net/core/neighbour.c | 47 | ||||
-rw-r--r-- | net/ipv4/arp.c | 6 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 8 |
4 files changed, 51 insertions, 26 deletions
diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a4538d553704..f04e7a2522c5 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h | |||
@@ -105,6 +105,7 @@ struct neighbour { | |||
105 | atomic_t refcnt; | 105 | atomic_t refcnt; |
106 | atomic_t probes; | 106 | atomic_t probes; |
107 | rwlock_t lock; | 107 | rwlock_t lock; |
108 | seqlock_t ha_lock; | ||
108 | unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; | 109 | unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; |
109 | struct hh_cache *hh; | 110 | struct hh_cache *hh; |
110 | int (*output)(struct sk_buff *skb); | 111 | int (*output)(struct sk_buff *skb); |
@@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh) | |||
302 | 303 | ||
303 | static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) | 304 | static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) |
304 | { | 305 | { |
305 | neigh->used = jiffies; | 306 | unsigned long now = ACCESS_ONCE(jiffies); |
307 | |||
308 | if (neigh->used != now) | ||
309 | neigh->used = now; | ||
306 | if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) | 310 | if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) |
307 | return __neigh_event_send(neigh, skb); | 311 | return __neigh_event_send(neigh, skb); |
308 | return 0; | 312 | return 0; |
@@ -373,4 +377,14 @@ struct neighbour_cb { | |||
373 | 377 | ||
374 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | 378 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) |
375 | 379 | ||
380 | static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, | ||
381 | const struct net_device *dev) | ||
382 | { | ||
383 | unsigned int seq; | ||
384 | |||
385 | do { | ||
386 | seq = read_seqbegin(&n->ha_lock); | ||
387 | memcpy(dst, n->ha, dev->addr_len); | ||
388 | } while (read_seqretry(&n->ha_lock, seq)); | ||
389 | } | ||
376 | #endif | 390 | #endif |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2044906ecd1a..b165b96355bf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) | |||
294 | 294 | ||
295 | skb_queue_head_init(&n->arp_queue); | 295 | skb_queue_head_init(&n->arp_queue); |
296 | rwlock_init(&n->lock); | 296 | rwlock_init(&n->lock); |
297 | seqlock_init(&n->ha_lock); | ||
297 | n->updated = n->used = now; | 298 | n->updated = n->used = now; |
298 | n->nud_state = NUD_NONE; | 299 | n->nud_state = NUD_NONE; |
299 | n->output = neigh_blackhole; | 300 | n->output = neigh_blackhole; |
@@ -1015,7 +1016,7 @@ out_unlock_bh: | |||
1015 | } | 1016 | } |
1016 | EXPORT_SYMBOL(__neigh_event_send); | 1017 | EXPORT_SYMBOL(__neigh_event_send); |
1017 | 1018 | ||
1018 | static void neigh_update_hhs(struct neighbour *neigh) | 1019 | static void neigh_update_hhs(const struct neighbour *neigh) |
1019 | { | 1020 | { |
1020 | struct hh_cache *hh; | 1021 | struct hh_cache *hh; |
1021 | void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) | 1022 | void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) |
@@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |||
1151 | } | 1152 | } |
1152 | 1153 | ||
1153 | if (lladdr != neigh->ha) { | 1154 | if (lladdr != neigh->ha) { |
1155 | write_seqlock(&neigh->ha_lock); | ||
1154 | memcpy(&neigh->ha, lladdr, dev->addr_len); | 1156 | memcpy(&neigh->ha, lladdr, dev->addr_len); |
1157 | write_sequnlock(&neigh->ha_lock); | ||
1155 | neigh_update_hhs(neigh); | 1158 | neigh_update_hhs(neigh); |
1156 | if (!(new & NUD_CONNECTED)) | 1159 | if (!(new & NUD_CONNECTED)) |
1157 | neigh->confirmed = jiffies - | 1160 | neigh->confirmed = jiffies - |
@@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, | |||
1214 | { | 1217 | { |
1215 | struct hh_cache *hh; | 1218 | struct hh_cache *hh; |
1216 | 1219 | ||
1220 | smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ | ||
1217 | for (hh = n->hh; hh; hh = hh->hh_next) { | 1221 | for (hh = n->hh; hh; hh = hh->hh_next) { |
1218 | if (hh->hh_type == protocol) { | 1222 | if (hh->hh_type == protocol) { |
1219 | atomic_inc(&hh->hh_refcnt); | 1223 | atomic_inc(&hh->hh_refcnt); |
@@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, | |||
1248 | kfree(hh); | 1252 | kfree(hh); |
1249 | return; | 1253 | return; |
1250 | } | 1254 | } |
1251 | read_unlock(&n->lock); | 1255 | |
1252 | write_lock(&n->lock); | 1256 | write_lock_bh(&n->lock); |
1253 | 1257 | ||
1254 | /* must check if another thread already did the insert */ | 1258 | /* must check if another thread already did the insert */ |
1255 | if (neigh_hh_lookup(n, dst, protocol)) { | 1259 | if (neigh_hh_lookup(n, dst, protocol)) { |
@@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, | |||
1263 | hh->hh_output = n->ops->output; | 1267 | hh->hh_output = n->ops->output; |
1264 | 1268 | ||
1265 | hh->hh_next = n->hh; | 1269 | hh->hh_next = n->hh; |
1270 | smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ | ||
1266 | n->hh = hh; | 1271 | n->hh = hh; |
1267 | 1272 | ||
1268 | if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) | 1273 | if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) |
1269 | hh_cache_put(hh); | 1274 | hh_cache_put(hh); |
1270 | end: | 1275 | end: |
1271 | write_unlock(&n->lock); | 1276 | write_unlock_bh(&n->lock); |
1272 | read_lock(&n->lock); | ||
1273 | } | 1277 | } |
1274 | 1278 | ||
1275 | /* This function can be used in contexts, where only old dev_queue_xmit | 1279 | /* This function can be used in contexts, where only old dev_queue_xmit |
@@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb) | |||
1308 | if (!neigh_event_send(neigh, skb)) { | 1312 | if (!neigh_event_send(neigh, skb)) { |
1309 | int err; | 1313 | int err; |
1310 | struct net_device *dev = neigh->dev; | 1314 | struct net_device *dev = neigh->dev; |
1315 | unsigned int seq; | ||
1311 | 1316 | ||
1312 | read_lock_bh(&neigh->lock); | ||
1313 | if (dev->header_ops->cache && | 1317 | if (dev->header_ops->cache && |
1314 | !dst->hh && | 1318 | !dst->hh && |
1315 | !(dst->flags & DST_NOCACHE)) | 1319 | !(dst->flags & DST_NOCACHE)) |
1316 | neigh_hh_init(neigh, dst, dst->ops->protocol); | 1320 | neigh_hh_init(neigh, dst, dst->ops->protocol); |
1317 | 1321 | ||
1318 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), | 1322 | do { |
1319 | neigh->ha, NULL, skb->len); | 1323 | seq = read_seqbegin(&neigh->ha_lock); |
1320 | read_unlock_bh(&neigh->lock); | 1324 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), |
1325 | neigh->ha, NULL, skb->len); | ||
1326 | } while (read_seqretry(&neigh->ha_lock, seq)); | ||
1321 | 1327 | ||
1322 | if (err >= 0) | 1328 | if (err >= 0) |
1323 | rc = neigh->ops->queue_xmit(skb); | 1329 | rc = neigh->ops->queue_xmit(skb); |
@@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb) | |||
1344 | struct dst_entry *dst = skb_dst(skb); | 1350 | struct dst_entry *dst = skb_dst(skb); |
1345 | struct neighbour *neigh = dst->neighbour; | 1351 | struct neighbour *neigh = dst->neighbour; |
1346 | struct net_device *dev = neigh->dev; | 1352 | struct net_device *dev = neigh->dev; |
1353 | unsigned int seq; | ||
1347 | 1354 | ||
1348 | __skb_pull(skb, skb_network_offset(skb)); | 1355 | __skb_pull(skb, skb_network_offset(skb)); |
1349 | 1356 | ||
1350 | read_lock_bh(&neigh->lock); | 1357 | do { |
1351 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), | 1358 | seq = read_seqbegin(&neigh->ha_lock); |
1352 | neigh->ha, NULL, skb->len); | 1359 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), |
1353 | read_unlock_bh(&neigh->lock); | 1360 | neigh->ha, NULL, skb->len); |
1361 | } while (read_seqretry(&neigh->ha_lock, seq)); | ||
1362 | |||
1354 | if (err >= 0) | 1363 | if (err >= 0) |
1355 | err = neigh->ops->queue_xmit(skb); | 1364 | err = neigh->ops->queue_xmit(skb); |
1356 | else { | 1365 | else { |
@@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, | |||
2148 | 2157 | ||
2149 | read_lock_bh(&neigh->lock); | 2158 | read_lock_bh(&neigh->lock); |
2150 | ndm->ndm_state = neigh->nud_state; | 2159 | ndm->ndm_state = neigh->nud_state; |
2151 | if ((neigh->nud_state & NUD_VALID) && | 2160 | if (neigh->nud_state & NUD_VALID) { |
2152 | nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { | 2161 | char haddr[MAX_ADDR_LEN]; |
2153 | read_unlock_bh(&neigh->lock); | 2162 | |
2154 | goto nla_put_failure; | 2163 | neigh_ha_snapshot(haddr, neigh, neigh->dev); |
2164 | if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { | ||
2165 | read_unlock_bh(&neigh->lock); | ||
2166 | goto nla_put_failure; | ||
2167 | } | ||
2155 | } | 2168 | } |
2156 | 2169 | ||
2157 | ci.ndm_used = jiffies_to_clock_t(now - neigh->used); | 2170 | ci.ndm_used = jiffies_to_clock_t(now - neigh->used); |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f35309578170..d8e540c5b071 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) | |||
502 | 502 | ||
503 | if (n) { | 503 | if (n) { |
504 | n->used = jiffies; | 504 | n->used = jiffies; |
505 | if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) { | 505 | if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) { |
506 | read_lock_bh(&n->lock); | 506 | neigh_ha_snapshot(haddr, n, dev); |
507 | memcpy(haddr, n->ha, dev->addr_len); | ||
508 | read_unlock_bh(&n->lock); | ||
509 | neigh_release(n); | 507 | neigh_release(n); |
510 | return 0; | 508 | return 0; |
511 | } | 509 | } |
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index feaabc103ce6..401af9596709 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c | |||
@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * | |||
241 | } | 241 | } |
242 | if (neigh_event_send(n, skb_res) == 0) { | 242 | if (neigh_event_send(n, skb_res) == 0) { |
243 | int err; | 243 | int err; |
244 | char haddr[MAX_ADDR_LEN]; | ||
244 | 245 | ||
245 | read_lock(&n->lock); | 246 | neigh_ha_snapshot(haddr, n, dev); |
246 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), | 247 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, |
247 | n->ha, NULL, skb->len); | 248 | NULL, skb->len); |
248 | read_unlock(&n->lock); | ||
249 | 249 | ||
250 | if (err < 0) { | 250 | if (err < 0) { |
251 | neigh_release(n); | 251 | neigh_release(n); |