aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_connection_sock.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2016-02-11 19:28:50 -0500
committerDavid S. Miller <davem@davemloft.net>2016-02-12 05:28:32 -0500
commitea8add2b190395408b22a9127bed2c0912aecbc8 (patch)
tree8b75745c913c33eba2707c3dc5fb0e312bfe3387 /net/ipv4/inet_connection_sock.c
parent1580ab63fc9a03593072cc5656167a75c4f1d173 (diff)
tcp/dccp: better use of ephemeral ports in bind()
Implement strategy used in __inet_hash_connect() in opposite way : Try to find a candidate using odd ports, then fallback to even ports. We no longer disable BH for whole traversal, but one bucket at a time. We also use cond_resched() to yield cpu to other tasks if needed. I removed one indentation level and tried to mirror the loop we have in __inet_hash_connect() and variable names to ease code maintenance. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
-rw-r--r--net/ipv4/inet_connection_sock.c240
1 files changed, 114 insertions, 126 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c16a2e6273d9..3d28c6d5c3c3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -91,165 +91,153 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
91 91
92/* Obtain a reference to a local port for the given sock, 92/* Obtain a reference to a local port for the given sock,
93 * if snum is zero it means select any available local port. 93 * if snum is zero it means select any available local port.
94 * We try to allocate an odd port (and leave even ports for connect())
94 */ 95 */
95int inet_csk_get_port(struct sock *sk, unsigned short snum) 96int inet_csk_get_port(struct sock *sk, unsigned short snum)
96{ 97{
97 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 98 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
99 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
100 int ret = 1, attempts = 5, port = snum;
101 int smallest_size = -1, smallest_port;
98 struct inet_bind_hashbucket *head; 102 struct inet_bind_hashbucket *head;
99 struct inet_bind_bucket *tb;
100 int ret, attempts = 5;
101 struct net *net = sock_net(sk); 103 struct net *net = sock_net(sk);
102 int smallest_size = -1, smallest_rover; 104 int i, low, high, attempt_half;
105 struct inet_bind_bucket *tb;
103 kuid_t uid = sock_i_uid(sk); 106 kuid_t uid = sock_i_uid(sk);
104 int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 107 u32 remaining, offset;
105 108
106 local_bh_disable(); 109 if (port) {
107 if (!snum) { 110have_port:
108 int remaining, rover, low, high; 111 head = &hinfo->bhash[inet_bhashfn(net, port,
112 hinfo->bhash_size)];
113 spin_lock_bh(&head->lock);
114 inet_bind_bucket_for_each(tb, &head->chain)
115 if (net_eq(ib_net(tb), net) && tb->port == port)
116 goto tb_found;
109 117
118 goto tb_not_found;
119 }
110again: 120again:
111 inet_get_local_port_range(net, &low, &high); 121 attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
112 if (attempt_half) { 122other_half_scan:
113 int half = low + ((high - low) >> 1); 123 inet_get_local_port_range(net, &low, &high);
114 124 high++; /* [32768, 60999] -> [32768, 61000[ */
115 if (attempt_half == 1) 125 if (high - low < 4)
116 high = half; 126 attempt_half = 0;
117 else 127 if (attempt_half) {
118 low = half; 128 int half = low + (((high - low) >> 2) << 1);
119 } 129
120 remaining = (high - low) + 1; 130 if (attempt_half == 1)
121 smallest_rover = rover = prandom_u32() % remaining + low; 131 high = half;
122 132 else
123 smallest_size = -1; 133 low = half;
124 do { 134 }
125 if (inet_is_local_reserved_port(net, rover)) 135 remaining = high - low;
126 goto next_nolock; 136 if (likely(remaining > 1))
127 head = &hashinfo->bhash[inet_bhashfn(net, rover, 137 remaining &= ~1U;
128 hashinfo->bhash_size)]; 138
129 spin_lock(&head->lock); 139 offset = prandom_u32() % remaining;
130 inet_bind_bucket_for_each(tb, &head->chain) 140 /* __inet_hash_connect() favors ports having @low parity
131 if (net_eq(ib_net(tb), net) && tb->port == rover) { 141 * We do the opposite to not pollute connect() users.
132 if (((tb->fastreuse > 0 && 142 */
133 sk->sk_reuse && 143 offset |= 1U;
134 sk->sk_state != TCP_LISTEN) || 144 smallest_size = -1;
135 (tb->fastreuseport > 0 && 145 smallest_port = low; /* avoid compiler warning */
136 sk->sk_reuseport && 146
137 !rcu_access_pointer(sk->sk_reuseport_cb) && 147other_parity_scan:
138 uid_eq(tb->fastuid, uid))) && 148 port = low + offset;
139 (tb->num_owners < smallest_size || smallest_size == -1)) { 149 for (i = 0; i < remaining; i += 2, port += 2) {
140 smallest_size = tb->num_owners; 150 if (unlikely(port >= high))
141 smallest_rover = rover; 151 port -= remaining;
142 } 152 if (inet_is_local_reserved_port(net, port))
143 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 153 continue;
144 snum = rover; 154 head = &hinfo->bhash[inet_bhashfn(net, port,
145 goto tb_found; 155 hinfo->bhash_size)];
146 } 156 spin_lock_bh(&head->lock);
147 goto next; 157 inet_bind_bucket_for_each(tb, &head->chain)
158 if (net_eq(ib_net(tb), net) && tb->port == port) {
159 if (((tb->fastreuse > 0 && reuse) ||
160 (tb->fastreuseport > 0 &&
161 sk->sk_reuseport &&
162 !rcu_access_pointer(sk->sk_reuseport_cb) &&
163 uid_eq(tb->fastuid, uid))) &&
164 (tb->num_owners < smallest_size || smallest_size == -1)) {
165 smallest_size = tb->num_owners;
166 smallest_port = port;
148 } 167 }
149 break; 168 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
150 next: 169 goto tb_found;
151 spin_unlock(&head->lock); 170 goto next_port;
152 next_nolock:
153 if (++rover > high)
154 rover = low;
155 } while (--remaining > 0);
156
157 /* Exhausted local port range during search? It is not
158 * possible for us to be holding one of the bind hash
159 * locks if this test triggers, because if 'remaining'
160 * drops to zero, we broke out of the do/while loop at
161 * the top level, not from the 'break;' statement.
162 */
163 ret = 1;
164 if (remaining <= 0) {
165 if (smallest_size != -1) {
166 snum = smallest_rover;
167 goto have_snum;
168 } 171 }
169 if (attempt_half == 1) { 172 goto tb_not_found;
170 /* OK we now try the upper half of the range */ 173next_port:
171 attempt_half = 2; 174 spin_unlock_bh(&head->lock);
172 goto again; 175 cond_resched();
173 }
174 goto fail;
175 }
176 /* OK, here is the one we will use. HEAD is
177 * non-NULL and we hold it's mutex.
178 */
179 snum = rover;
180 } else {
181have_snum:
182 head = &hashinfo->bhash[inet_bhashfn(net, snum,
183 hashinfo->bhash_size)];
184 spin_lock(&head->lock);
185 inet_bind_bucket_for_each(tb, &head->chain)
186 if (net_eq(ib_net(tb), net) && tb->port == snum)
187 goto tb_found;
188 } 176 }
189 tb = NULL; 177
190 goto tb_not_found; 178 if (smallest_size != -1) {
179 port = smallest_port;
180 goto have_port;
181 }
182 offset--;
183 if (!(offset & 1))
184 goto other_parity_scan;
185
186 if (attempt_half == 1) {
187 /* OK we now try the upper half of the range */
188 attempt_half = 2;
189 goto other_half_scan;
190 }
191 return ret;
192
193tb_not_found:
194 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
195 net, head, port);
196 if (!tb)
197 goto fail_unlock;
191tb_found: 198tb_found:
192 if (!hlist_empty(&tb->owners)) { 199 if (!hlist_empty(&tb->owners)) {
193 if (sk->sk_reuse == SK_FORCE_REUSE) 200 if (sk->sk_reuse == SK_FORCE_REUSE)
194 goto success; 201 goto success;
195 202
196 if (((tb->fastreuse > 0 && 203 if (((tb->fastreuse > 0 && reuse) ||
197 sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
198 (tb->fastreuseport > 0 && 204 (tb->fastreuseport > 0 &&
199 sk->sk_reuseport && 205 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
200 !rcu_access_pointer(sk->sk_reuseport_cb) && 206 smallest_size == -1)
201 uid_eq(tb->fastuid, uid))) && smallest_size == -1) {
202 goto success; 207 goto success;
203 } else { 208 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
204 ret = 1; 209 if ((reuse ||
205 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { 210 (tb->fastreuseport > 0 &&
206 if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || 211 sk->sk_reuseport &&
207 (tb->fastreuseport > 0 && 212 !rcu_access_pointer(sk->sk_reuseport_cb) &&
208 sk->sk_reuseport && 213 uid_eq(tb->fastuid, uid))) &&
209 !rcu_access_pointer(sk->sk_reuseport_cb) && 214 smallest_size != -1 && --attempts >= 0) {
210 uid_eq(tb->fastuid, uid))) && 215 spin_unlock_bh(&head->lock);
211 smallest_size != -1 && --attempts >= 0) { 216 goto again;
212 spin_unlock(&head->lock);
213 goto again;
214 }
215
216 goto fail_unlock;
217 } 217 }
218 goto fail_unlock;
218 } 219 }
219 } 220 if (!reuse)
220tb_not_found:
221 ret = 1;
222 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
223 net, head, snum)) == NULL)
224 goto fail_unlock;
225 if (hlist_empty(&tb->owners)) {
226 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
227 tb->fastreuse = 1;
228 else
229 tb->fastreuse = 0; 221 tb->fastreuse = 0;
222 if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
223 tb->fastreuseport = 0;
224 } else {
225 tb->fastreuse = reuse;
230 if (sk->sk_reuseport) { 226 if (sk->sk_reuseport) {
231 tb->fastreuseport = 1; 227 tb->fastreuseport = 1;
232 tb->fastuid = uid; 228 tb->fastuid = uid;
233 } else 229 } else {
234 tb->fastreuseport = 0;
235 } else {
236 if (tb->fastreuse &&
237 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
238 tb->fastreuse = 0;
239 if (tb->fastreuseport &&
240 (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
241 tb->fastreuseport = 0; 230 tb->fastreuseport = 0;
231 }
242 } 232 }
243success: 233success:
244 if (!inet_csk(sk)->icsk_bind_hash) 234 if (!inet_csk(sk)->icsk_bind_hash)
245 inet_bind_hash(sk, tb, snum); 235 inet_bind_hash(sk, tb, port);
246 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 236 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
247 ret = 0; 237 ret = 0;
248 238
249fail_unlock: 239fail_unlock:
250 spin_unlock(&head->lock); 240 spin_unlock_bh(&head->lock);
251fail:
252 local_bh_enable();
253 return ret; 241 return ret;
254} 242}
255EXPORT_SYMBOL_GPL(inet_csk_get_port); 243EXPORT_SYMBOL_GPL(inet_csk_get_port);