diff options
Diffstat (limited to 'net/ipv6/inet6_hashtables.c')
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 182 |
1 files changed, 115 insertions, 67 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1646a5658255..8fe267feb81e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -25,26 +25,30 @@ | |||
25 | void __inet6_hash(struct sock *sk) | 25 | void __inet6_hash(struct sock *sk) |
26 | { | 26 | { |
27 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 27 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
28 | struct hlist_head *list; | ||
29 | rwlock_t *lock; | ||
30 | 28 | ||
31 | WARN_ON(!sk_unhashed(sk)); | 29 | WARN_ON(!sk_unhashed(sk)); |
32 | 30 | ||
33 | if (sk->sk_state == TCP_LISTEN) { | 31 | if (sk->sk_state == TCP_LISTEN) { |
34 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | 32 | struct inet_listen_hashbucket *ilb; |
35 | lock = &hashinfo->lhash_lock; | 33 | |
36 | inet_listen_wlock(hashinfo); | 34 | ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
35 | spin_lock(&ilb->lock); | ||
36 | __sk_nulls_add_node_rcu(sk, &ilb->head); | ||
37 | spin_unlock(&ilb->lock); | ||
37 | } else { | 38 | } else { |
38 | unsigned int hash; | 39 | unsigned int hash; |
40 | struct hlist_nulls_head *list; | ||
41 | spinlock_t *lock; | ||
42 | |||
39 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); | 43 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); |
40 | list = &inet_ehash_bucket(hashinfo, hash)->chain; | 44 | list = &inet_ehash_bucket(hashinfo, hash)->chain; |
41 | lock = inet_ehash_lockp(hashinfo, hash); | 45 | lock = inet_ehash_lockp(hashinfo, hash); |
42 | write_lock(lock); | 46 | spin_lock(lock); |
47 | __sk_nulls_add_node_rcu(sk, list); | ||
48 | spin_unlock(lock); | ||
43 | } | 49 | } |
44 | 50 | ||
45 | __sk_add_node(sk, list); | ||
46 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 51 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
47 | write_unlock(lock); | ||
48 | } | 52 | } |
49 | EXPORT_SYMBOL(__inet6_hash); | 53 | EXPORT_SYMBOL(__inet6_hash); |
50 | 54 | ||
@@ -63,77 +67,122 @@ struct sock *__inet6_lookup_established(struct net *net, | |||
63 | const int dif) | 67 | const int dif) |
64 | { | 68 | { |
65 | struct sock *sk; | 69 | struct sock *sk; |
66 | const struct hlist_node *node; | 70 | const struct hlist_nulls_node *node; |
67 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); | 71 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
68 | /* Optimize here for direct hit, only listening connections can | 72 | /* Optimize here for direct hit, only listening connections can |
69 | * have wildcards anyways. | 73 | * have wildcards anyways. |
70 | */ | 74 | */ |
71 | unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); | 75 | unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); |
72 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 76 | unsigned int slot = hash & (hashinfo->ehash_size - 1); |
73 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | 77 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
74 | 78 | ||
75 | prefetch(head->chain.first); | 79 | |
76 | read_lock(lock); | 80 | rcu_read_lock(); |
77 | sk_for_each(sk, node, &head->chain) { | 81 | begin: |
82 | sk_nulls_for_each_rcu(sk, node, &head->chain) { | ||
78 | /* For IPV6 do the cheaper port and family tests first. */ | 83 | /* For IPV6 do the cheaper port and family tests first. */ |
79 | if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) | 84 | if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { |
80 | goto hit; /* You sunk my battleship! */ | 85 | if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) |
86 | goto begintw; | ||
87 | if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { | ||
88 | sock_put(sk); | ||
89 | goto begin; | ||
90 | } | ||
91 | goto out; | ||
92 | } | ||
81 | } | 93 | } |
94 | if (get_nulls_value(node) != slot) | ||
95 | goto begin; | ||
96 | |||
97 | begintw: | ||
82 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | 98 | /* Must check for a TIME_WAIT'er before going to listener hash. */ |
83 | sk_for_each(sk, node, &head->twchain) { | 99 | sk_nulls_for_each_rcu(sk, node, &head->twchain) { |
84 | if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) | 100 | if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { |
85 | goto hit; | 101 | if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { |
102 | sk = NULL; | ||
103 | goto out; | ||
104 | } | ||
105 | if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { | ||
106 | sock_put(sk); | ||
107 | goto begintw; | ||
108 | } | ||
109 | goto out; | ||
110 | } | ||
86 | } | 111 | } |
87 | read_unlock(lock); | 112 | if (get_nulls_value(node) != slot) |
88 | return NULL; | 113 | goto begintw; |
89 | 114 | sk = NULL; | |
90 | hit: | 115 | out: |
91 | sock_hold(sk); | 116 | rcu_read_unlock(); |
92 | read_unlock(lock); | ||
93 | return sk; | 117 | return sk; |
94 | } | 118 | } |
95 | EXPORT_SYMBOL(__inet6_lookup_established); | 119 | EXPORT_SYMBOL(__inet6_lookup_established); |
96 | 120 | ||
121 | static int inline compute_score(struct sock *sk, struct net *net, | ||
122 | const unsigned short hnum, | ||
123 | const struct in6_addr *daddr, | ||
124 | const int dif) | ||
125 | { | ||
126 | int score = -1; | ||
127 | |||
128 | if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && | ||
129 | sk->sk_family == PF_INET6) { | ||
130 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
131 | |||
132 | score = 1; | ||
133 | if (!ipv6_addr_any(&np->rcv_saddr)) { | ||
134 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | ||
135 | return -1; | ||
136 | score++; | ||
137 | } | ||
138 | if (sk->sk_bound_dev_if) { | ||
139 | if (sk->sk_bound_dev_if != dif) | ||
140 | return -1; | ||
141 | score++; | ||
142 | } | ||
143 | } | ||
144 | return score; | ||
145 | } | ||
146 | |||
97 | struct sock *inet6_lookup_listener(struct net *net, | 147 | struct sock *inet6_lookup_listener(struct net *net, |
98 | struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, | 148 | struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, |
99 | const unsigned short hnum, const int dif) | 149 | const unsigned short hnum, const int dif) |
100 | { | 150 | { |
101 | struct sock *sk; | 151 | struct sock *sk; |
102 | const struct hlist_node *node; | 152 | const struct hlist_nulls_node *node; |
103 | struct sock *result = NULL; | 153 | struct sock *result; |
104 | int score, hiscore = 0; | 154 | int score, hiscore; |
105 | 155 | unsigned int hash = inet_lhashfn(net, hnum); | |
106 | read_lock(&hashinfo->lhash_lock); | 156 | struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; |
107 | sk_for_each(sk, node, | 157 | |
108 | &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { | 158 | rcu_read_lock(); |
109 | if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && | 159 | begin: |
110 | sk->sk_family == PF_INET6) { | 160 | result = NULL; |
111 | const struct ipv6_pinfo *np = inet6_sk(sk); | 161 | hiscore = -1; |
112 | 162 | sk_nulls_for_each(sk, node, &ilb->head) { | |
113 | score = 1; | 163 | score = compute_score(sk, net, hnum, daddr, dif); |
114 | if (!ipv6_addr_any(&np->rcv_saddr)) { | 164 | if (score > hiscore) { |
115 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | 165 | hiscore = score; |
116 | continue; | 166 | result = sk; |
117 | score++; | 167 | } |
118 | } | 168 | } |
119 | if (sk->sk_bound_dev_if) { | 169 | /* |
120 | if (sk->sk_bound_dev_if != dif) | 170 | * if the nulls value we got at the end of this lookup is |
121 | continue; | 171 | * not the expected one, we must restart lookup. |
122 | score++; | 172 | * We probably met an item that was moved to another chain. |
123 | } | 173 | */ |
124 | if (score == 3) { | 174 | if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) |
125 | result = sk; | 175 | goto begin; |
126 | break; | 176 | if (result) { |
127 | } | 177 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
128 | if (score > hiscore) { | 178 | result = NULL; |
129 | hiscore = score; | 179 | else if (unlikely(compute_score(result, net, hnum, daddr, |
130 | result = sk; | 180 | dif) < hiscore)) { |
131 | } | 181 | sock_put(result); |
182 | goto begin; | ||
132 | } | 183 | } |
133 | } | 184 | } |
134 | if (result) | 185 | rcu_read_unlock(); |
135 | sock_hold(result); | ||
136 | read_unlock(&hashinfo->lhash_lock); | ||
137 | return result; | 186 | return result; |
138 | } | 187 | } |
139 | 188 | ||
@@ -170,16 +219,15 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, | |||
170 | const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, | 219 | const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, |
171 | inet->dport); | 220 | inet->dport); |
172 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 221 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
173 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); | 222 | spinlock_t *lock = inet_ehash_lockp(hinfo, hash); |
174 | struct sock *sk2; | 223 | struct sock *sk2; |
175 | const struct hlist_node *node; | 224 | const struct hlist_nulls_node *node; |
176 | struct inet_timewait_sock *tw; | 225 | struct inet_timewait_sock *tw; |
177 | 226 | ||
178 | prefetch(head->chain.first); | 227 | spin_lock(lock); |
179 | write_lock(lock); | ||
180 | 228 | ||
181 | /* Check TIME-WAIT sockets first. */ | 229 | /* Check TIME-WAIT sockets first. */ |
182 | sk_for_each(sk2, node, &head->twchain) { | 230 | sk_nulls_for_each(sk2, node, &head->twchain) { |
183 | tw = inet_twsk(sk2); | 231 | tw = inet_twsk(sk2); |
184 | 232 | ||
185 | if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { | 233 | if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { |
@@ -192,7 +240,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, | |||
192 | tw = NULL; | 240 | tw = NULL; |
193 | 241 | ||
194 | /* And established part... */ | 242 | /* And established part... */ |
195 | sk_for_each(sk2, node, &head->chain) { | 243 | sk_nulls_for_each(sk2, node, &head->chain) { |
196 | if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) | 244 | if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) |
197 | goto not_unique; | 245 | goto not_unique; |
198 | } | 246 | } |
@@ -203,10 +251,10 @@ unique: | |||
203 | inet->num = lport; | 251 | inet->num = lport; |
204 | inet->sport = htons(lport); | 252 | inet->sport = htons(lport); |
205 | WARN_ON(!sk_unhashed(sk)); | 253 | WARN_ON(!sk_unhashed(sk)); |
206 | __sk_add_node(sk, &head->chain); | 254 | __sk_nulls_add_node_rcu(sk, &head->chain); |
207 | sk->sk_hash = hash; | 255 | sk->sk_hash = hash; |
256 | spin_unlock(lock); | ||
208 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 257 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
209 | write_unlock(lock); | ||
210 | 258 | ||
211 | if (twp != NULL) { | 259 | if (twp != NULL) { |
212 | *twp = tw; | 260 | *twp = tw; |
@@ -221,7 +269,7 @@ unique: | |||
221 | return 0; | 269 | return 0; |
222 | 270 | ||
223 | not_unique: | 271 | not_unique: |
224 | write_unlock(lock); | 272 | spin_unlock(lock); |
225 | return -EADDRNOTAVAIL; | 273 | return -EADDRNOTAVAIL; |
226 | } | 274 | } |
227 | 275 | ||