diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2005-07-05 17:55:24 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-07-05 17:55:24 -0400 |
commit | 22c047ccbc68fa8f3fa57f0e8f906479a062c426 (patch) | |
tree | 351735ba39f845a67847d72f84087df724c95046 /net/ipv4/route.c | |
parent | f0e36f8cee8101604378085171c980d9cc71d779 (diff) |
[NET]: Hashed spinlocks in net/ipv4/route.c
- Locking abstraction
- Spinlocks moved out of rt hash table : Less memory (50%) used by rt
hash table. it's a win even on UP.
- Sizing of spinlocks table depends on NR_CPUS
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 66 |
1 files changed, 47 insertions, 19 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 12a1cf306f67..daf82f8d3c4a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -54,6 +54,7 @@ | |||
54 | * Marc Boucher : routing by fwmark | 54 | * Marc Boucher : routing by fwmark |
55 | * Robert Olsson : Added rt_cache statistics | 55 | * Robert Olsson : Added rt_cache statistics |
56 | * Arnaldo C. Melo : Convert proc stuff to seq_file | 56 | * Arnaldo C. Melo : Convert proc stuff to seq_file |
57 | * Eric Dumazet : hashed spinlocks | ||
57 | * | 58 | * |
58 | * This program is free software; you can redistribute it and/or | 59 | * This program is free software; you can redistribute it and/or |
59 | * modify it under the terms of the GNU General Public License | 60 | * modify it under the terms of the GNU General Public License |
@@ -201,8 +202,37 @@ __u8 ip_tos2prio[16] = { | |||
201 | 202 | ||
202 | struct rt_hash_bucket { | 203 | struct rt_hash_bucket { |
203 | struct rtable *chain; | 204 | struct rtable *chain; |
204 | spinlock_t lock; | 205 | }; |
205 | } __attribute__((__aligned__(8))); | 206 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) |
207 | /* | ||
208 | * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks | ||
209 | * The size of this table is a power of two and depends on the number of CPUS. | ||
210 | */ | ||
211 | #if NR_CPUS >= 32 | ||
212 | #define RT_HASH_LOCK_SZ 4096 | ||
213 | #elif NR_CPUS >= 16 | ||
214 | #define RT_HASH_LOCK_SZ 2048 | ||
215 | #elif NR_CPUS >= 8 | ||
216 | #define RT_HASH_LOCK_SZ 1024 | ||
217 | #elif NR_CPUS >= 4 | ||
218 | #define RT_HASH_LOCK_SZ 512 | ||
219 | #else | ||
220 | #define RT_HASH_LOCK_SZ 256 | ||
221 | #endif | ||
222 | |||
223 | static spinlock_t *rt_hash_locks; | ||
224 | # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] | ||
225 | # define rt_hash_lock_init() { \ | ||
226 | int i; \ | ||
227 | rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \ | ||
228 | if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \ | ||
229 | for (i = 0; i < RT_HASH_LOCK_SZ; i++) \ | ||
230 | spin_lock_init(&rt_hash_locks[i]); \ | ||
231 | } | ||
232 | #else | ||
233 | # define rt_hash_lock_addr(slot) NULL | ||
234 | # define rt_hash_lock_init() | ||
235 | #endif | ||
206 | 236 | ||
207 | static struct rt_hash_bucket *rt_hash_table; | 237 | static struct rt_hash_bucket *rt_hash_table; |
208 | static unsigned rt_hash_mask; | 238 | static unsigned rt_hash_mask; |
@@ -587,7 +617,7 @@ static void rt_check_expire(unsigned long dummy) | |||
587 | i = (i + 1) & rt_hash_mask; | 617 | i = (i + 1) & rt_hash_mask; |
588 | rthp = &rt_hash_table[i].chain; | 618 | rthp = &rt_hash_table[i].chain; |
589 | 619 | ||
590 | spin_lock(&rt_hash_table[i].lock); | 620 | spin_lock(rt_hash_lock_addr(i)); |
591 | while ((rth = *rthp) != NULL) { | 621 | while ((rth = *rthp) != NULL) { |
592 | if (rth->u.dst.expires) { | 622 | if (rth->u.dst.expires) { |
593 | /* Entry is expired even if it is in use */ | 623 | /* Entry is expired even if it is in use */ |
@@ -620,7 +650,7 @@ static void rt_check_expire(unsigned long dummy) | |||
620 | rt_free(rth); | 650 | rt_free(rth); |
621 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | 651 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ |
622 | } | 652 | } |
623 | spin_unlock(&rt_hash_table[i].lock); | 653 | spin_unlock(rt_hash_lock_addr(i)); |
624 | 654 | ||
625 | /* Fallback loop breaker. */ | 655 | /* Fallback loop breaker. */ |
626 | if (time_after(jiffies, now)) | 656 | if (time_after(jiffies, now)) |
@@ -643,11 +673,11 @@ static void rt_run_flush(unsigned long dummy) | |||
643 | get_random_bytes(&rt_hash_rnd, 4); | 673 | get_random_bytes(&rt_hash_rnd, 4); |
644 | 674 | ||
645 | for (i = rt_hash_mask; i >= 0; i--) { | 675 | for (i = rt_hash_mask; i >= 0; i--) { |
646 | spin_lock_bh(&rt_hash_table[i].lock); | 676 | spin_lock_bh(rt_hash_lock_addr(i)); |
647 | rth = rt_hash_table[i].chain; | 677 | rth = rt_hash_table[i].chain; |
648 | if (rth) | 678 | if (rth) |
649 | rt_hash_table[i].chain = NULL; | 679 | rt_hash_table[i].chain = NULL; |
650 | spin_unlock_bh(&rt_hash_table[i].lock); | 680 | spin_unlock_bh(rt_hash_lock_addr(i)); |
651 | 681 | ||
652 | for (; rth; rth = next) { | 682 | for (; rth; rth = next) { |
653 | next = rth->u.rt_next; | 683 | next = rth->u.rt_next; |
@@ -780,7 +810,7 @@ static int rt_garbage_collect(void) | |||
780 | 810 | ||
781 | k = (k + 1) & rt_hash_mask; | 811 | k = (k + 1) & rt_hash_mask; |
782 | rthp = &rt_hash_table[k].chain; | 812 | rthp = &rt_hash_table[k].chain; |
783 | spin_lock_bh(&rt_hash_table[k].lock); | 813 | spin_lock_bh(rt_hash_lock_addr(k)); |
784 | while ((rth = *rthp) != NULL) { | 814 | while ((rth = *rthp) != NULL) { |
785 | if (!rt_may_expire(rth, tmo, expire)) { | 815 | if (!rt_may_expire(rth, tmo, expire)) { |
786 | tmo >>= 1; | 816 | tmo >>= 1; |
@@ -812,7 +842,7 @@ static int rt_garbage_collect(void) | |||
812 | goal--; | 842 | goal--; |
813 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | 843 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ |
814 | } | 844 | } |
815 | spin_unlock_bh(&rt_hash_table[k].lock); | 845 | spin_unlock_bh(rt_hash_lock_addr(k)); |
816 | if (goal <= 0) | 846 | if (goal <= 0) |
817 | break; | 847 | break; |
818 | } | 848 | } |
@@ -882,7 +912,7 @@ restart: | |||
882 | 912 | ||
883 | rthp = &rt_hash_table[hash].chain; | 913 | rthp = &rt_hash_table[hash].chain; |
884 | 914 | ||
885 | spin_lock_bh(&rt_hash_table[hash].lock); | 915 | spin_lock_bh(rt_hash_lock_addr(hash)); |
886 | while ((rth = *rthp) != NULL) { | 916 | while ((rth = *rthp) != NULL) { |
887 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | 917 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED |
888 | if (!(rth->u.dst.flags & DST_BALANCED) && | 918 | if (!(rth->u.dst.flags & DST_BALANCED) && |
@@ -908,7 +938,7 @@ restart: | |||
908 | rth->u.dst.__use++; | 938 | rth->u.dst.__use++; |
909 | dst_hold(&rth->u.dst); | 939 | dst_hold(&rth->u.dst); |
910 | rth->u.dst.lastuse = now; | 940 | rth->u.dst.lastuse = now; |
911 | spin_unlock_bh(&rt_hash_table[hash].lock); | 941 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
912 | 942 | ||
913 | rt_drop(rt); | 943 | rt_drop(rt); |
914 | *rp = rth; | 944 | *rp = rth; |
@@ -949,7 +979,7 @@ restart: | |||
949 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 979 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
950 | int err = arp_bind_neighbour(&rt->u.dst); | 980 | int err = arp_bind_neighbour(&rt->u.dst); |
951 | if (err) { | 981 | if (err) { |
952 | spin_unlock_bh(&rt_hash_table[hash].lock); | 982 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
953 | 983 | ||
954 | if (err != -ENOBUFS) { | 984 | if (err != -ENOBUFS) { |
955 | rt_drop(rt); | 985 | rt_drop(rt); |
@@ -990,7 +1020,7 @@ restart: | |||
990 | } | 1020 | } |
991 | #endif | 1021 | #endif |
992 | rt_hash_table[hash].chain = rt; | 1022 | rt_hash_table[hash].chain = rt; |
993 | spin_unlock_bh(&rt_hash_table[hash].lock); | 1023 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
994 | *rp = rt; | 1024 | *rp = rt; |
995 | return 0; | 1025 | return 0; |
996 | } | 1026 | } |
@@ -1058,7 +1088,7 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1058 | { | 1088 | { |
1059 | struct rtable **rthp; | 1089 | struct rtable **rthp; |
1060 | 1090 | ||
1061 | spin_lock_bh(&rt_hash_table[hash].lock); | 1091 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1062 | ip_rt_put(rt); | 1092 | ip_rt_put(rt); |
1063 | for (rthp = &rt_hash_table[hash].chain; *rthp; | 1093 | for (rthp = &rt_hash_table[hash].chain; *rthp; |
1064 | rthp = &(*rthp)->u.rt_next) | 1094 | rthp = &(*rthp)->u.rt_next) |
@@ -1067,7 +1097,7 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1067 | rt_free(rt); | 1097 | rt_free(rt); |
1068 | break; | 1098 | break; |
1069 | } | 1099 | } |
1070 | spin_unlock_bh(&rt_hash_table[hash].lock); | 1100 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1071 | } | 1101 | } |
1072 | 1102 | ||
1073 | void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, | 1103 | void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, |
@@ -3073,7 +3103,7 @@ __setup("rhash_entries=", set_rhash_entries); | |||
3073 | 3103 | ||
3074 | int __init ip_rt_init(void) | 3104 | int __init ip_rt_init(void) |
3075 | { | 3105 | { |
3076 | int i, order, goal, rc = 0; | 3106 | int order, goal, rc = 0; |
3077 | 3107 | ||
3078 | rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ | 3108 | rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ |
3079 | (jiffies ^ (jiffies >> 7))); | 3109 | (jiffies ^ (jiffies >> 7))); |
@@ -3122,10 +3152,8 @@ int __init ip_rt_init(void) | |||
3122 | /* NOTHING */; | 3152 | /* NOTHING */; |
3123 | 3153 | ||
3124 | rt_hash_mask--; | 3154 | rt_hash_mask--; |
3125 | for (i = 0; i <= rt_hash_mask; i++) { | 3155 | memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); |
3126 | spin_lock_init(&rt_hash_table[i].lock); | 3156 | rt_hash_lock_init(); |
3127 | rt_hash_table[i].chain = NULL; | ||
3128 | } | ||
3129 | 3157 | ||
3130 | ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); | 3158 | ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); |
3131 | ip_rt_max_size = (rt_hash_mask + 1) * 16; | 3159 | ip_rt_max_size = (rt_hash_mask + 1) * 16; |