diff options
author | Julian Anastasov <ja@ssi.bg> | 2012-07-18 06:15:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-07-19 13:30:14 -0400 |
commit | aee06da6726d4981c51928c2d6d1e2cabeec7a10 (patch) | |
tree | d7b0281232aac81edfd5da0ca96c6b99ac9c7dae | |
parent | 7fed84f622ec2696087301199c2952b85e0cc3b4 (diff) |
ipv4: use seqlock for nh_exceptions
Use global seqlock for the nh_exceptions. Call
fnhe_oldest with the right hash chain. Correct the diff
value for dst_set_expires.
v2: after suggestions from Eric Dumazet:
* get rid of spin lock fnhe_lock, rearrange update_or_create_fnhe
* continue daddr search in rt_bind_exception
v3:
* remove the daddr check before seqlock in rt_bind_exception
* restart lookup in rt_bind_exception on detected seqlock change,
as suggested by David Miller
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip_fib.h | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 118 |
2 files changed, 69 insertions, 51 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e9ee1ca07087..2daf096dfc60 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -51,7 +51,7 @@ struct fib_nh_exception { | |||
51 | struct fib_nh_exception __rcu *fnhe_next; | 51 | struct fib_nh_exception __rcu *fnhe_next; |
52 | __be32 fnhe_daddr; | 52 | __be32 fnhe_daddr; |
53 | u32 fnhe_pmtu; | 53 | u32 fnhe_pmtu; |
54 | u32 fnhe_gw; | 54 | __be32 fnhe_gw; |
55 | unsigned long fnhe_expires; | 55 | unsigned long fnhe_expires; |
56 | unsigned long fnhe_stamp; | 56 | unsigned long fnhe_stamp; |
57 | }; | 57 | }; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2c25581bf25c..89e39dc5336b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1333,9 +1333,9 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, | |||
1333 | build_sk_flow_key(fl4, sk); | 1333 | build_sk_flow_key(fl4, sk); |
1334 | } | 1334 | } |
1335 | 1335 | ||
1336 | static DEFINE_SPINLOCK(fnhe_lock); | 1336 | static DEFINE_SEQLOCK(fnhe_seqlock); |
1337 | 1337 | ||
1338 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr) | 1338 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) |
1339 | { | 1339 | { |
1340 | struct fib_nh_exception *fnhe, *oldest; | 1340 | struct fib_nh_exception *fnhe, *oldest; |
1341 | 1341 | ||
@@ -1358,47 +1358,63 @@ static inline u32 fnhe_hashfun(__be32 daddr) | |||
1358 | return hval & (FNHE_HASH_SIZE - 1); | 1358 | return hval & (FNHE_HASH_SIZE - 1); |
1359 | } | 1359 | } |
1360 | 1360 | ||
1361 | static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr) | 1361 | static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, |
1362 | u32 pmtu, unsigned long expires) | ||
1362 | { | 1363 | { |
1363 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; | 1364 | struct fnhe_hash_bucket *hash; |
1364 | struct fib_nh_exception *fnhe; | 1365 | struct fib_nh_exception *fnhe; |
1365 | int depth; | 1366 | int depth; |
1366 | u32 hval; | 1367 | u32 hval = fnhe_hashfun(daddr); |
1368 | |||
1369 | write_seqlock_bh(&fnhe_seqlock); | ||
1367 | 1370 | ||
1371 | hash = nh->nh_exceptions; | ||
1368 | if (!hash) { | 1372 | if (!hash) { |
1369 | hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), | 1373 | hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC); |
1370 | GFP_ATOMIC); | ||
1371 | if (!hash) | 1374 | if (!hash) |
1372 | return NULL; | 1375 | goto out_unlock; |
1376 | nh->nh_exceptions = hash; | ||
1373 | } | 1377 | } |
1374 | 1378 | ||
1375 | hval = fnhe_hashfun(daddr); | ||
1376 | hash += hval; | 1379 | hash += hval; |
1377 | 1380 | ||
1378 | depth = 0; | 1381 | depth = 0; |
1379 | for (fnhe = rcu_dereference(hash->chain); fnhe; | 1382 | for (fnhe = rcu_dereference(hash->chain); fnhe; |
1380 | fnhe = rcu_dereference(fnhe->fnhe_next)) { | 1383 | fnhe = rcu_dereference(fnhe->fnhe_next)) { |
1381 | if (fnhe->fnhe_daddr == daddr) | 1384 | if (fnhe->fnhe_daddr == daddr) |
1382 | goto out; | 1385 | break; |
1383 | depth++; | 1386 | depth++; |
1384 | } | 1387 | } |
1385 | 1388 | ||
1386 | if (depth > FNHE_RECLAIM_DEPTH) { | 1389 | if (fnhe) { |
1387 | fnhe = fnhe_oldest(hash + hval, daddr); | 1390 | if (gw) |
1388 | goto out_daddr; | 1391 | fnhe->fnhe_gw = gw; |
1392 | if (pmtu) { | ||
1393 | fnhe->fnhe_pmtu = pmtu; | ||
1394 | fnhe->fnhe_expires = expires; | ||
1395 | } | ||
1396 | } else { | ||
1397 | if (depth > FNHE_RECLAIM_DEPTH) | ||
1398 | fnhe = fnhe_oldest(hash); | ||
1399 | else { | ||
1400 | fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); | ||
1401 | if (!fnhe) | ||
1402 | goto out_unlock; | ||
1403 | |||
1404 | fnhe->fnhe_next = hash->chain; | ||
1405 | rcu_assign_pointer(hash->chain, fnhe); | ||
1406 | } | ||
1407 | fnhe->fnhe_daddr = daddr; | ||
1408 | fnhe->fnhe_gw = gw; | ||
1409 | fnhe->fnhe_pmtu = pmtu; | ||
1410 | fnhe->fnhe_expires = expires; | ||
1389 | } | 1411 | } |
1390 | fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); | ||
1391 | if (!fnhe) | ||
1392 | return NULL; | ||
1393 | |||
1394 | fnhe->fnhe_next = hash->chain; | ||
1395 | rcu_assign_pointer(hash->chain, fnhe); | ||
1396 | 1412 | ||
1397 | out_daddr: | ||
1398 | fnhe->fnhe_daddr = daddr; | ||
1399 | out: | ||
1400 | fnhe->fnhe_stamp = jiffies; | 1413 | fnhe->fnhe_stamp = jiffies; |
1401 | return fnhe; | 1414 | |
1415 | out_unlock: | ||
1416 | write_sequnlock_bh(&fnhe_seqlock); | ||
1417 | return; | ||
1402 | } | 1418 | } |
1403 | 1419 | ||
1404 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) | 1420 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) |
@@ -1452,13 +1468,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow | |||
1452 | } else { | 1468 | } else { |
1453 | if (fib_lookup(net, fl4, &res) == 0) { | 1469 | if (fib_lookup(net, fl4, &res) == 0) { |
1454 | struct fib_nh *nh = &FIB_RES_NH(res); | 1470 | struct fib_nh *nh = &FIB_RES_NH(res); |
1455 | struct fib_nh_exception *fnhe; | ||
1456 | 1471 | ||
1457 | spin_lock_bh(&fnhe_lock); | 1472 | update_or_create_fnhe(nh, fl4->daddr, new_gw, |
1458 | fnhe = find_or_create_fnhe(nh, fl4->daddr); | 1473 | 0, 0); |
1459 | if (fnhe) | ||
1460 | fnhe->fnhe_gw = new_gw; | ||
1461 | spin_unlock_bh(&fnhe_lock); | ||
1462 | } | 1474 | } |
1463 | rt->rt_gateway = new_gw; | 1475 | rt->rt_gateway = new_gw; |
1464 | rt->rt_flags |= RTCF_REDIRECTED; | 1476 | rt->rt_flags |= RTCF_REDIRECTED; |
@@ -1663,15 +1675,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
1663 | 1675 | ||
1664 | if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { | 1676 | if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { |
1665 | struct fib_nh *nh = &FIB_RES_NH(res); | 1677 | struct fib_nh *nh = &FIB_RES_NH(res); |
1666 | struct fib_nh_exception *fnhe; | ||
1667 | 1678 | ||
1668 | spin_lock_bh(&fnhe_lock); | 1679 | update_or_create_fnhe(nh, fl4->daddr, 0, mtu, |
1669 | fnhe = find_or_create_fnhe(nh, fl4->daddr); | 1680 | jiffies + ip_rt_mtu_expires); |
1670 | if (fnhe) { | ||
1671 | fnhe->fnhe_pmtu = mtu; | ||
1672 | fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires; | ||
1673 | } | ||
1674 | spin_unlock_bh(&fnhe_lock); | ||
1675 | } | 1681 | } |
1676 | rt->rt_pmtu = mtu; | 1682 | rt->rt_pmtu = mtu; |
1677 | dst_set_expires(&rt->dst, ip_rt_mtu_expires); | 1683 | dst_set_expires(&rt->dst, ip_rt_mtu_expires); |
@@ -1902,23 +1908,35 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr | |||
1902 | 1908 | ||
1903 | hval = fnhe_hashfun(daddr); | 1909 | hval = fnhe_hashfun(daddr); |
1904 | 1910 | ||
1911 | restart: | ||
1905 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; | 1912 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; |
1906 | fnhe = rcu_dereference(fnhe->fnhe_next)) { | 1913 | fnhe = rcu_dereference(fnhe->fnhe_next)) { |
1907 | if (fnhe->fnhe_daddr == daddr) { | 1914 | __be32 fnhe_daddr, gw; |
1908 | if (fnhe->fnhe_pmtu) { | 1915 | unsigned long expires; |
1909 | unsigned long expires = fnhe->fnhe_expires; | 1916 | unsigned int seq; |
1910 | unsigned long diff = expires - jiffies; | 1917 | u32 pmtu; |
1911 | 1918 | ||
1912 | if (time_before(jiffies, expires)) { | 1919 | seq = read_seqbegin(&fnhe_seqlock); |
1913 | rt->rt_pmtu = fnhe->fnhe_pmtu; | 1920 | fnhe_daddr = fnhe->fnhe_daddr; |
1914 | dst_set_expires(&rt->dst, diff); | 1921 | gw = fnhe->fnhe_gw; |
1915 | } | 1922 | pmtu = fnhe->fnhe_pmtu; |
1923 | expires = fnhe->fnhe_expires; | ||
1924 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
1925 | goto restart; | ||
1926 | if (daddr != fnhe_daddr) | ||
1927 | continue; | ||
1928 | if (pmtu) { | ||
1929 | unsigned long diff = jiffies - expires; | ||
1930 | |||
1931 | if (time_before(jiffies, expires)) { | ||
1932 | rt->rt_pmtu = pmtu; | ||
1933 | dst_set_expires(&rt->dst, diff); | ||
1916 | } | 1934 | } |
1917 | if (fnhe->fnhe_gw) | ||
1918 | rt->rt_gateway = fnhe->fnhe_gw; | ||
1919 | fnhe->fnhe_stamp = jiffies; | ||
1920 | break; | ||
1921 | } | 1935 | } |
1936 | if (gw) | ||
1937 | rt->rt_gateway = gw; | ||
1938 | fnhe->fnhe_stamp = jiffies; | ||
1939 | break; | ||
1922 | } | 1940 | } |
1923 | } | 1941 | } |
1924 | 1942 | ||