aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-07 06:44:07 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-11 15:54:04 -0400
commit0ed8ddf4045fcfcac36bad753dc4046118c603ec (patch)
treecf1d9eb14668c4d2257b3519ed7deec8c5cb396d
parentd122179a3c0fdc71b88cb9e3605f372b1651a9ff (diff)
neigh: Protect neigh->ha[] with a seqlock
Add a seqlock in struct neighbour to protect neigh->ha[], and avoid dirtying neighbour in stress situation (many different flows / dsts) Dirtying takes place because of read_lock(&n->lock) and n->used writes. Switching to a seqlock, and writing n->used only on jiffies changes permits less dirtying. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/neighbour.h16
-rw-r--r--net/core/neighbour.c47
-rw-r--r--net/ipv4/arp.c6
-rw-r--r--net/sched/sch_teql.c8
4 files changed, 51 insertions, 26 deletions
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a4538d553704..f04e7a2522c5 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -105,6 +105,7 @@ struct neighbour {
105 atomic_t refcnt; 105 atomic_t refcnt;
106 atomic_t probes; 106 atomic_t probes;
107 rwlock_t lock; 107 rwlock_t lock;
108 seqlock_t ha_lock;
108 unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; 109 unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
109 struct hh_cache *hh; 110 struct hh_cache *hh;
110 int (*output)(struct sk_buff *skb); 111 int (*output)(struct sk_buff *skb);
@@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh)
302 303
303static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 304static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
304{ 305{
305 neigh->used = jiffies; 306 unsigned long now = ACCESS_ONCE(jiffies);
307
308 if (neigh->used != now)
309 neigh->used = now;
306 if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) 310 if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
307 return __neigh_event_send(neigh, skb); 311 return __neigh_event_send(neigh, skb);
308 return 0; 312 return 0;
@@ -373,4 +377,14 @@ struct neighbour_cb {
373 377
374#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) 378#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
375 379
380static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
381 const struct net_device *dev)
382{
383 unsigned int seq;
384
385 do {
386 seq = read_seqbegin(&n->ha_lock);
387 memcpy(dst, n->ha, dev->addr_len);
388 } while (read_seqretry(&n->ha_lock, seq));
389}
376#endif 390#endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2044906ecd1a..b165b96355bf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
294 294
295 skb_queue_head_init(&n->arp_queue); 295 skb_queue_head_init(&n->arp_queue);
296 rwlock_init(&n->lock); 296 rwlock_init(&n->lock);
297 seqlock_init(&n->ha_lock);
297 n->updated = n->used = now; 298 n->updated = n->used = now;
298 n->nud_state = NUD_NONE; 299 n->nud_state = NUD_NONE;
299 n->output = neigh_blackhole; 300 n->output = neigh_blackhole;
@@ -1015,7 +1016,7 @@ out_unlock_bh:
1015} 1016}
1016EXPORT_SYMBOL(__neigh_event_send); 1017EXPORT_SYMBOL(__neigh_event_send);
1017 1018
1018static void neigh_update_hhs(struct neighbour *neigh) 1019static void neigh_update_hhs(const struct neighbour *neigh)
1019{ 1020{
1020 struct hh_cache *hh; 1021 struct hh_cache *hh;
1021 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1022 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1151 } 1152 }
1152 1153
1153 if (lladdr != neigh->ha) { 1154 if (lladdr != neigh->ha) {
1155 write_seqlock(&neigh->ha_lock);
1154 memcpy(&neigh->ha, lladdr, dev->addr_len); 1156 memcpy(&neigh->ha, lladdr, dev->addr_len);
1157 write_sequnlock(&neigh->ha_lock);
1155 neigh_update_hhs(neigh); 1158 neigh_update_hhs(neigh);
1156 if (!(new & NUD_CONNECTED)) 1159 if (!(new & NUD_CONNECTED))
1157 neigh->confirmed = jiffies - 1160 neigh->confirmed = jiffies -
@@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1214{ 1217{
1215 struct hh_cache *hh; 1218 struct hh_cache *hh;
1216 1219
1220 smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1217 for (hh = n->hh; hh; hh = hh->hh_next) { 1221 for (hh = n->hh; hh; hh = hh->hh_next) {
1218 if (hh->hh_type == protocol) { 1222 if (hh->hh_type == protocol) {
1219 atomic_inc(&hh->hh_refcnt); 1223 atomic_inc(&hh->hh_refcnt);
@@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1248 kfree(hh); 1252 kfree(hh);
1249 return; 1253 return;
1250 } 1254 }
1251 read_unlock(&n->lock); 1255
1252 write_lock(&n->lock); 1256 write_lock_bh(&n->lock);
1253 1257
1254 /* must check if another thread already did the insert */ 1258 /* must check if another thread already did the insert */
1255 if (neigh_hh_lookup(n, dst, protocol)) { 1259 if (neigh_hh_lookup(n, dst, protocol)) {
@@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1263 hh->hh_output = n->ops->output; 1267 hh->hh_output = n->ops->output;
1264 1268
1265 hh->hh_next = n->hh; 1269 hh->hh_next = n->hh;
1270 smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1266 n->hh = hh; 1271 n->hh = hh;
1267 1272
1268 if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) 1273 if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1269 hh_cache_put(hh); 1274 hh_cache_put(hh);
1270end: 1275end:
1271 write_unlock(&n->lock); 1276 write_unlock_bh(&n->lock);
1272 read_lock(&n->lock);
1273} 1277}
1274 1278
1275/* This function can be used in contexts, where only old dev_queue_xmit 1279/* This function can be used in contexts, where only old dev_queue_xmit
@@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb)
1308 if (!neigh_event_send(neigh, skb)) { 1312 if (!neigh_event_send(neigh, skb)) {
1309 int err; 1313 int err;
1310 struct net_device *dev = neigh->dev; 1314 struct net_device *dev = neigh->dev;
1315 unsigned int seq;
1311 1316
1312 read_lock_bh(&neigh->lock);
1313 if (dev->header_ops->cache && 1317 if (dev->header_ops->cache &&
1314 !dst->hh && 1318 !dst->hh &&
1315 !(dst->flags & DST_NOCACHE)) 1319 !(dst->flags & DST_NOCACHE))
1316 neigh_hh_init(neigh, dst, dst->ops->protocol); 1320 neigh_hh_init(neigh, dst, dst->ops->protocol);
1317 1321
1318 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1322 do {
1319 neigh->ha, NULL, skb->len); 1323 seq = read_seqbegin(&neigh->ha_lock);
1320 read_unlock_bh(&neigh->lock); 1324 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1325 neigh->ha, NULL, skb->len);
1326 } while (read_seqretry(&neigh->ha_lock, seq));
1321 1327
1322 if (err >= 0) 1328 if (err >= 0)
1323 rc = neigh->ops->queue_xmit(skb); 1329 rc = neigh->ops->queue_xmit(skb);
@@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
1344 struct dst_entry *dst = skb_dst(skb); 1350 struct dst_entry *dst = skb_dst(skb);
1345 struct neighbour *neigh = dst->neighbour; 1351 struct neighbour *neigh = dst->neighbour;
1346 struct net_device *dev = neigh->dev; 1352 struct net_device *dev = neigh->dev;
1353 unsigned int seq;
1347 1354
1348 __skb_pull(skb, skb_network_offset(skb)); 1355 __skb_pull(skb, skb_network_offset(skb));
1349 1356
1350 read_lock_bh(&neigh->lock); 1357 do {
1351 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1358 seq = read_seqbegin(&neigh->ha_lock);
1352 neigh->ha, NULL, skb->len); 1359 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1353 read_unlock_bh(&neigh->lock); 1360 neigh->ha, NULL, skb->len);
1361 } while (read_seqretry(&neigh->ha_lock, seq));
1362
1354 if (err >= 0) 1363 if (err >= 0)
1355 err = neigh->ops->queue_xmit(skb); 1364 err = neigh->ops->queue_xmit(skb);
1356 else { 1365 else {
@@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2148 2157
2149 read_lock_bh(&neigh->lock); 2158 read_lock_bh(&neigh->lock);
2150 ndm->ndm_state = neigh->nud_state; 2159 ndm->ndm_state = neigh->nud_state;
2151 if ((neigh->nud_state & NUD_VALID) && 2160 if (neigh->nud_state & NUD_VALID) {
2152 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { 2161 char haddr[MAX_ADDR_LEN];
2153 read_unlock_bh(&neigh->lock); 2162
2154 goto nla_put_failure; 2163 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2164 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2165 read_unlock_bh(&neigh->lock);
2166 goto nla_put_failure;
2167 }
2155 } 2168 }
2156 2169
2157 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2170 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f35309578170..d8e540c5b071 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
502 502
503 if (n) { 503 if (n) {
504 n->used = jiffies; 504 n->used = jiffies;
505 if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) { 505 if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
506 read_lock_bh(&n->lock); 506 neigh_ha_snapshot(haddr, n, dev);
507 memcpy(haddr, n->ha, dev->addr_len);
508 read_unlock_bh(&n->lock);
509 neigh_release(n); 507 neigh_release(n);
510 return 0; 508 return 0;
511 } 509 }
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index feaabc103ce6..401af9596709 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
241 } 241 }
242 if (neigh_event_send(n, skb_res) == 0) { 242 if (neigh_event_send(n, skb_res) == 0) {
243 int err; 243 int err;
244 char haddr[MAX_ADDR_LEN];
244 245
245 read_lock(&n->lock); 246 neigh_ha_snapshot(haddr, n, dev);
246 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 247 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
247 n->ha, NULL, skb->len); 248 NULL, skb->len);
248 read_unlock(&n->lock);
249 249
250 if (err < 0) { 250 if (err < 0) {
251 neigh_release(n); 251 neigh_release(n);