aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulian Anastasov <ja@ssi.bg>2012-07-18 06:15:35 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-19 13:30:14 -0400
commitaee06da6726d4981c51928c2d6d1e2cabeec7a10 (patch)
treed7b0281232aac81edfd5da0ca96c6b99ac9c7dae
parent7fed84f622ec2696087301199c2952b85e0cc3b4 (diff)
ipv4: use seqlock for nh_exceptions
Use global seqlock for the nh_exceptions. Call fnhe_oldest with the right hash chain. Correct the diff value for dst_set_expires. v2: after suggestions from Eric Dumazet: * get rid of spin lock fnhe_lock, rearrange update_or_create_fnhe * continue daddr search in rt_bind_exception v3: * remove the daddr check before seqlock in rt_bind_exception * restart lookup in rt_bind_exception on detected seqlock change, as suggested by David Miller Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--net/ipv4/route.c118
2 files changed, 69 insertions, 51 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e9ee1ca07087..2daf096dfc60 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -51,7 +51,7 @@ struct fib_nh_exception {
51 struct fib_nh_exception __rcu *fnhe_next; 51 struct fib_nh_exception __rcu *fnhe_next;
52 __be32 fnhe_daddr; 52 __be32 fnhe_daddr;
53 u32 fnhe_pmtu; 53 u32 fnhe_pmtu;
54 u32 fnhe_gw; 54 __be32 fnhe_gw;
55 unsigned long fnhe_expires; 55 unsigned long fnhe_expires;
56 unsigned long fnhe_stamp; 56 unsigned long fnhe_stamp;
57}; 57};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2c25581bf25c..89e39dc5336b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1333,9 +1333,9 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
1333 build_sk_flow_key(fl4, sk); 1333 build_sk_flow_key(fl4, sk);
1334} 1334}
1335 1335
1336static DEFINE_SPINLOCK(fnhe_lock); 1336static DEFINE_SEQLOCK(fnhe_seqlock);
1337 1337
1338static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr) 1338static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
1339{ 1339{
1340 struct fib_nh_exception *fnhe, *oldest; 1340 struct fib_nh_exception *fnhe, *oldest;
1341 1341
@@ -1358,47 +1358,63 @@ static inline u32 fnhe_hashfun(__be32 daddr)
1358 return hval & (FNHE_HASH_SIZE - 1); 1358 return hval & (FNHE_HASH_SIZE - 1);
1359} 1359}
1360 1360
1361static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr) 1361static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
1362 u32 pmtu, unsigned long expires)
1362{ 1363{
1363 struct fnhe_hash_bucket *hash = nh->nh_exceptions; 1364 struct fnhe_hash_bucket *hash;
1364 struct fib_nh_exception *fnhe; 1365 struct fib_nh_exception *fnhe;
1365 int depth; 1366 int depth;
1366 u32 hval; 1367 u32 hval = fnhe_hashfun(daddr);
1368
1369 write_seqlock_bh(&fnhe_seqlock);
1367 1370
1371 hash = nh->nh_exceptions;
1368 if (!hash) { 1372 if (!hash) {
1369 hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), 1373 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
1370 GFP_ATOMIC);
1371 if (!hash) 1374 if (!hash)
1372 return NULL; 1375 goto out_unlock;
1376 nh->nh_exceptions = hash;
1373 } 1377 }
1374 1378
1375 hval = fnhe_hashfun(daddr);
1376 hash += hval; 1379 hash += hval;
1377 1380
1378 depth = 0; 1381 depth = 0;
1379 for (fnhe = rcu_dereference(hash->chain); fnhe; 1382 for (fnhe = rcu_dereference(hash->chain); fnhe;
1380 fnhe = rcu_dereference(fnhe->fnhe_next)) { 1383 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1381 if (fnhe->fnhe_daddr == daddr) 1384 if (fnhe->fnhe_daddr == daddr)
1382 goto out; 1385 break;
1383 depth++; 1386 depth++;
1384 } 1387 }
1385 1388
1386 if (depth > FNHE_RECLAIM_DEPTH) { 1389 if (fnhe) {
1387 fnhe = fnhe_oldest(hash + hval, daddr); 1390 if (gw)
1388 goto out_daddr; 1391 fnhe->fnhe_gw = gw;
1392 if (pmtu) {
1393 fnhe->fnhe_pmtu = pmtu;
1394 fnhe->fnhe_expires = expires;
1395 }
1396 } else {
1397 if (depth > FNHE_RECLAIM_DEPTH)
1398 fnhe = fnhe_oldest(hash);
1399 else {
1400 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
1401 if (!fnhe)
1402 goto out_unlock;
1403
1404 fnhe->fnhe_next = hash->chain;
1405 rcu_assign_pointer(hash->chain, fnhe);
1406 }
1407 fnhe->fnhe_daddr = daddr;
1408 fnhe->fnhe_gw = gw;
1409 fnhe->fnhe_pmtu = pmtu;
1410 fnhe->fnhe_expires = expires;
1389 } 1411 }
1390 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
1391 if (!fnhe)
1392 return NULL;
1393
1394 fnhe->fnhe_next = hash->chain;
1395 rcu_assign_pointer(hash->chain, fnhe);
1396 1412
1397out_daddr:
1398 fnhe->fnhe_daddr = daddr;
1399out:
1400 fnhe->fnhe_stamp = jiffies; 1413 fnhe->fnhe_stamp = jiffies;
1401 return fnhe; 1414
1415out_unlock:
1416 write_sequnlock_bh(&fnhe_seqlock);
1417 return;
1402} 1418}
1403 1419
1404static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) 1420static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4)
@@ -1452,13 +1468,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
1452 } else { 1468 } else {
1453 if (fib_lookup(net, fl4, &res) == 0) { 1469 if (fib_lookup(net, fl4, &res) == 0) {
1454 struct fib_nh *nh = &FIB_RES_NH(res); 1470 struct fib_nh *nh = &FIB_RES_NH(res);
1455 struct fib_nh_exception *fnhe;
1456 1471
1457 spin_lock_bh(&fnhe_lock); 1472 update_or_create_fnhe(nh, fl4->daddr, new_gw,
1458 fnhe = find_or_create_fnhe(nh, fl4->daddr); 1473 0, 0);
1459 if (fnhe)
1460 fnhe->fnhe_gw = new_gw;
1461 spin_unlock_bh(&fnhe_lock);
1462 } 1474 }
1463 rt->rt_gateway = new_gw; 1475 rt->rt_gateway = new_gw;
1464 rt->rt_flags |= RTCF_REDIRECTED; 1476 rt->rt_flags |= RTCF_REDIRECTED;
@@ -1663,15 +1675,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
1663 1675
1664 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { 1676 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
1665 struct fib_nh *nh = &FIB_RES_NH(res); 1677 struct fib_nh *nh = &FIB_RES_NH(res);
1666 struct fib_nh_exception *fnhe;
1667 1678
1668 spin_lock_bh(&fnhe_lock); 1679 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
1669 fnhe = find_or_create_fnhe(nh, fl4->daddr); 1680 jiffies + ip_rt_mtu_expires);
1670 if (fnhe) {
1671 fnhe->fnhe_pmtu = mtu;
1672 fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires;
1673 }
1674 spin_unlock_bh(&fnhe_lock);
1675 } 1681 }
1676 rt->rt_pmtu = mtu; 1682 rt->rt_pmtu = mtu;
1677 dst_set_expires(&rt->dst, ip_rt_mtu_expires); 1683 dst_set_expires(&rt->dst, ip_rt_mtu_expires);
@@ -1902,23 +1908,35 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr
1902 1908
1903 hval = fnhe_hashfun(daddr); 1909 hval = fnhe_hashfun(daddr);
1904 1910
1911restart:
1905 for (fnhe = rcu_dereference(hash[hval].chain); fnhe; 1912 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1906 fnhe = rcu_dereference(fnhe->fnhe_next)) { 1913 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1907 if (fnhe->fnhe_daddr == daddr) { 1914 __be32 fnhe_daddr, gw;
1908 if (fnhe->fnhe_pmtu) { 1915 unsigned long expires;
1909 unsigned long expires = fnhe->fnhe_expires; 1916 unsigned int seq;
1910 unsigned long diff = expires - jiffies; 1917 u32 pmtu;
1911 1918
1912 if (time_before(jiffies, expires)) { 1919 seq = read_seqbegin(&fnhe_seqlock);
1913 rt->rt_pmtu = fnhe->fnhe_pmtu; 1920 fnhe_daddr = fnhe->fnhe_daddr;
1914 dst_set_expires(&rt->dst, diff); 1921 gw = fnhe->fnhe_gw;
1915 } 1922 pmtu = fnhe->fnhe_pmtu;
1923 expires = fnhe->fnhe_expires;
1924 if (read_seqretry(&fnhe_seqlock, seq))
1925 goto restart;
1926 if (daddr != fnhe_daddr)
1927 continue;
1928 if (pmtu) {
1929 unsigned long diff = jiffies - expires;
1930
1931 if (time_before(jiffies, expires)) {
1932 rt->rt_pmtu = pmtu;
1933 dst_set_expires(&rt->dst, diff);
1916 } 1934 }
1917 if (fnhe->fnhe_gw)
1918 rt->rt_gateway = fnhe->fnhe_gw;
1919 fnhe->fnhe_stamp = jiffies;
1920 break;
1921 } 1935 }
1936 if (gw)
1937 rt->rt_gateway = gw;
1938 fnhe->fnhe_stamp = jiffies;
1939 break;
1922 } 1940 }
1923} 1941}
1924 1942