diff options
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r-- | net/sunrpc/svcsock.c | 125 |
1 files changed, 80 insertions, 45 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a38df4589ae9..b78659adeff3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -46,7 +46,10 @@ | |||
46 | 46 | ||
47 | /* SMP locking strategy: | 47 | /* SMP locking strategy: |
48 | * | 48 | * |
49 | * svc_serv->sv_lock protects most stuff for that service. | 49 | * svc_pool->sp_lock protects most of the fields of that pool. |
50 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
51 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
52 | * BKL protects svc_serv->sv_nrthread. | ||
50 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list | 53 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list |
51 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | 54 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. |
52 | * | 55 | * |
@@ -82,22 +85,22 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req); | |||
82 | static int svc_conn_age_period = 6*60; | 85 | static int svc_conn_age_period = 6*60; |
83 | 86 | ||
84 | /* | 87 | /* |
85 | * Queue up an idle server thread. Must have serv->sv_lock held. | 88 | * Queue up an idle server thread. Must have pool->sp_lock held. |
86 | * Note: this is really a stack rather than a queue, so that we only | 89 | * Note: this is really a stack rather than a queue, so that we only |
87 | * use as many different threads as we need, and the rest don't polute | 90 | * use as many different threads as we need, and the rest don't pollute |
88 | * the cache. | 91 | * the cache. |
89 | */ | 92 | */ |
90 | static inline void | 93 | static inline void |
91 | svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) | 94 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) |
92 | { | 95 | { |
93 | list_add(&rqstp->rq_list, &serv->sv_threads); | 96 | list_add(&rqstp->rq_list, &pool->sp_threads); |
94 | } | 97 | } |
95 | 98 | ||
96 | /* | 99 | /* |
97 | * Dequeue an nfsd thread. Must have serv->sv_lock held. | 100 | * Dequeue an nfsd thread. Must have pool->sp_lock held. |
98 | */ | 101 | */ |
99 | static inline void | 102 | static inline void |
100 | svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) | 103 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) |
101 | { | 104 | { |
102 | list_del(&rqstp->rq_list); | 105 | list_del(&rqstp->rq_list); |
103 | } | 106 | } |
@@ -148,6 +151,7 @@ static void | |||
148 | svc_sock_enqueue(struct svc_sock *svsk) | 151 | svc_sock_enqueue(struct svc_sock *svsk) |
149 | { | 152 | { |
150 | struct svc_serv *serv = svsk->sk_server; | 153 | struct svc_serv *serv = svsk->sk_server; |
154 | struct svc_pool *pool = &serv->sv_pools[0]; | ||
151 | struct svc_rqst *rqstp; | 155 | struct svc_rqst *rqstp; |
152 | 156 | ||
153 | if (!(svsk->sk_flags & | 157 | if (!(svsk->sk_flags & |
@@ -156,10 +160,10 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
156 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | 160 | if (test_bit(SK_DEAD, &svsk->sk_flags)) |
157 | return; | 161 | return; |
158 | 162 | ||
159 | spin_lock_bh(&serv->sv_lock); | 163 | spin_lock_bh(&pool->sp_lock); |
160 | 164 | ||
161 | if (!list_empty(&serv->sv_threads) && | 165 | if (!list_empty(&pool->sp_threads) && |
162 | !list_empty(&serv->sv_sockets)) | 166 | !list_empty(&pool->sp_sockets)) |
163 | printk(KERN_ERR | 167 | printk(KERN_ERR |
164 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | 168 | "svc_sock_enqueue: threads and sockets both waiting??\n"); |
165 | 169 | ||
@@ -179,6 +183,8 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
179 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | 183 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); |
180 | goto out_unlock; | 184 | goto out_unlock; |
181 | } | 185 | } |
186 | BUG_ON(svsk->sk_pool != NULL); | ||
187 | svsk->sk_pool = pool; | ||
182 | 188 | ||
183 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 189 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
184 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 | 190 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 |
@@ -189,19 +195,20 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
189 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | 195 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", |
190 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, | 196 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, |
191 | svc_sock_wspace(svsk)); | 197 | svc_sock_wspace(svsk)); |
198 | svsk->sk_pool = NULL; | ||
192 | clear_bit(SK_BUSY, &svsk->sk_flags); | 199 | clear_bit(SK_BUSY, &svsk->sk_flags); |
193 | goto out_unlock; | 200 | goto out_unlock; |
194 | } | 201 | } |
195 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 202 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
196 | 203 | ||
197 | 204 | ||
198 | if (!list_empty(&serv->sv_threads)) { | 205 | if (!list_empty(&pool->sp_threads)) { |
199 | rqstp = list_entry(serv->sv_threads.next, | 206 | rqstp = list_entry(pool->sp_threads.next, |
200 | struct svc_rqst, | 207 | struct svc_rqst, |
201 | rq_list); | 208 | rq_list); |
202 | dprintk("svc: socket %p served by daemon %p\n", | 209 | dprintk("svc: socket %p served by daemon %p\n", |
203 | svsk->sk_sk, rqstp); | 210 | svsk->sk_sk, rqstp); |
204 | svc_serv_dequeue(serv, rqstp); | 211 | svc_thread_dequeue(pool, rqstp); |
205 | if (rqstp->rq_sock) | 212 | if (rqstp->rq_sock) |
206 | printk(KERN_ERR | 213 | printk(KERN_ERR |
207 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | 214 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", |
@@ -210,28 +217,30 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
210 | atomic_inc(&svsk->sk_inuse); | 217 | atomic_inc(&svsk->sk_inuse); |
211 | rqstp->rq_reserved = serv->sv_bufsz; | 218 | rqstp->rq_reserved = serv->sv_bufsz; |
212 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 219 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
220 | BUG_ON(svsk->sk_pool != pool); | ||
213 | wake_up(&rqstp->rq_wait); | 221 | wake_up(&rqstp->rq_wait); |
214 | } else { | 222 | } else { |
215 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | 223 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); |
216 | list_add_tail(&svsk->sk_ready, &serv->sv_sockets); | 224 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); |
225 | BUG_ON(svsk->sk_pool != pool); | ||
217 | } | 226 | } |
218 | 227 | ||
219 | out_unlock: | 228 | out_unlock: |
220 | spin_unlock_bh(&serv->sv_lock); | 229 | spin_unlock_bh(&pool->sp_lock); |
221 | } | 230 | } |
222 | 231 | ||
223 | /* | 232 | /* |
224 | * Dequeue the first socket. Must be called with the serv->sv_lock held. | 233 | * Dequeue the first socket. Must be called with the pool->sp_lock held. |
225 | */ | 234 | */ |
226 | static inline struct svc_sock * | 235 | static inline struct svc_sock * |
227 | svc_sock_dequeue(struct svc_serv *serv) | 236 | svc_sock_dequeue(struct svc_pool *pool) |
228 | { | 237 | { |
229 | struct svc_sock *svsk; | 238 | struct svc_sock *svsk; |
230 | 239 | ||
231 | if (list_empty(&serv->sv_sockets)) | 240 | if (list_empty(&pool->sp_sockets)) |
232 | return NULL; | 241 | return NULL; |
233 | 242 | ||
234 | svsk = list_entry(serv->sv_sockets.next, | 243 | svsk = list_entry(pool->sp_sockets.next, |
235 | struct svc_sock, sk_ready); | 244 | struct svc_sock, sk_ready); |
236 | list_del_init(&svsk->sk_ready); | 245 | list_del_init(&svsk->sk_ready); |
237 | 246 | ||
@@ -250,6 +259,7 @@ svc_sock_dequeue(struct svc_serv *serv) | |||
250 | static inline void | 259 | static inline void |
251 | svc_sock_received(struct svc_sock *svsk) | 260 | svc_sock_received(struct svc_sock *svsk) |
252 | { | 261 | { |
262 | svsk->sk_pool = NULL; | ||
253 | clear_bit(SK_BUSY, &svsk->sk_flags); | 263 | clear_bit(SK_BUSY, &svsk->sk_flags); |
254 | svc_sock_enqueue(svsk); | 264 | svc_sock_enqueue(svsk); |
255 | } | 265 | } |
@@ -322,25 +332,33 @@ svc_sock_release(struct svc_rqst *rqstp) | |||
322 | 332 | ||
323 | /* | 333 | /* |
324 | * External function to wake up a server waiting for data | 334 | * External function to wake up a server waiting for data |
335 | * This really only makes sense for services like lockd | ||
336 | * which have exactly one thread anyway. | ||
325 | */ | 337 | */ |
326 | void | 338 | void |
327 | svc_wake_up(struct svc_serv *serv) | 339 | svc_wake_up(struct svc_serv *serv) |
328 | { | 340 | { |
329 | struct svc_rqst *rqstp; | 341 | struct svc_rqst *rqstp; |
330 | 342 | unsigned int i; | |
331 | spin_lock_bh(&serv->sv_lock); | 343 | struct svc_pool *pool; |
332 | if (!list_empty(&serv->sv_threads)) { | 344 | |
333 | rqstp = list_entry(serv->sv_threads.next, | 345 | for (i = 0; i < serv->sv_nrpools; i++) { |
334 | struct svc_rqst, | 346 | pool = &serv->sv_pools[i]; |
335 | rq_list); | 347 | |
336 | dprintk("svc: daemon %p woken up.\n", rqstp); | 348 | spin_lock_bh(&pool->sp_lock); |
337 | /* | 349 | if (!list_empty(&pool->sp_threads)) { |
338 | svc_serv_dequeue(serv, rqstp); | 350 | rqstp = list_entry(pool->sp_threads.next, |
339 | rqstp->rq_sock = NULL; | 351 | struct svc_rqst, |
340 | */ | 352 | rq_list); |
341 | wake_up(&rqstp->rq_wait); | 353 | dprintk("svc: daemon %p woken up.\n", rqstp); |
354 | /* | ||
355 | svc_thread_dequeue(pool, rqstp); | ||
356 | rqstp->rq_sock = NULL; | ||
357 | */ | ||
358 | wake_up(&rqstp->rq_wait); | ||
359 | } | ||
360 | spin_unlock_bh(&pool->sp_lock); | ||
342 | } | 361 | } |
343 | spin_unlock_bh(&serv->sv_lock); | ||
344 | } | 362 | } |
345 | 363 | ||
346 | /* | 364 | /* |
@@ -603,7 +621,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
603 | /* udp sockets need large rcvbuf as all pending | 621 | /* udp sockets need large rcvbuf as all pending |
604 | * requests are still in that buffer. sndbuf must | 622 | * requests are still in that buffer. sndbuf must |
605 | * also be large enough that there is enough space | 623 | * also be large enough that there is enough space |
606 | * for one reply per thread. | 624 | * for one reply per thread. We count all threads |
625 | * rather than threads in a particular pool, which | ||
626 | * provides an upper bound on the number of threads | ||
627 | * which will access the socket. | ||
607 | */ | 628 | */ |
608 | svc_sock_setbufsize(svsk->sk_sock, | 629 | svc_sock_setbufsize(svsk->sk_sock, |
609 | (serv->sv_nrthreads+3) * serv->sv_bufsz, | 630 | (serv->sv_nrthreads+3) * serv->sv_bufsz, |
@@ -948,6 +969,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
948 | /* sndbuf needs to have room for one request | 969 | /* sndbuf needs to have room for one request |
949 | * per thread, otherwise we can stall even when the | 970 | * per thread, otherwise we can stall even when the |
950 | * network isn't a bottleneck. | 971 | * network isn't a bottleneck. |
972 | * | ||
973 | * We count all threads rather than threads in a | ||
974 | * particular pool, which provides an upper bound | ||
975 | * on the number of threads which will access the socket. | ||
976 | * | ||
951 | * rcvbuf just needs to be able to hold a few requests. | 977 | * rcvbuf just needs to be able to hold a few requests. |
952 | * Normally they will be removed from the queue | 978 | * Normally they will be removed from the queue |
953 | * as soon a a complete request arrives. | 979 | * as soon a a complete request arrives. |
@@ -1163,13 +1189,16 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
1163 | } | 1189 | } |
1164 | 1190 | ||
1165 | /* | 1191 | /* |
1166 | * Receive the next request on any socket. | 1192 | * Receive the next request on any socket. This code is carefully |
1193 | * organised not to touch any cachelines in the shared svc_serv | ||
1194 | * structure, only cachelines in the local svc_pool. | ||
1167 | */ | 1195 | */ |
1168 | int | 1196 | int |
1169 | svc_recv(struct svc_rqst *rqstp, long timeout) | 1197 | svc_recv(struct svc_rqst *rqstp, long timeout) |
1170 | { | 1198 | { |
1171 | struct svc_sock *svsk =NULL; | 1199 | struct svc_sock *svsk =NULL; |
1172 | struct svc_serv *serv = rqstp->rq_server; | 1200 | struct svc_serv *serv = rqstp->rq_server; |
1201 | struct svc_pool *pool = rqstp->rq_pool; | ||
1173 | int len; | 1202 | int len; |
1174 | int pages; | 1203 | int pages; |
1175 | struct xdr_buf *arg; | 1204 | struct xdr_buf *arg; |
@@ -1219,15 +1248,15 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1219 | if (signalled()) | 1248 | if (signalled()) |
1220 | return -EINTR; | 1249 | return -EINTR; |
1221 | 1250 | ||
1222 | spin_lock_bh(&serv->sv_lock); | 1251 | spin_lock_bh(&pool->sp_lock); |
1223 | if ((svsk = svc_sock_dequeue(serv)) != NULL) { | 1252 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { |
1224 | rqstp->rq_sock = svsk; | 1253 | rqstp->rq_sock = svsk; |
1225 | atomic_inc(&svsk->sk_inuse); | 1254 | atomic_inc(&svsk->sk_inuse); |
1226 | rqstp->rq_reserved = serv->sv_bufsz; | 1255 | rqstp->rq_reserved = serv->sv_bufsz; |
1227 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 1256 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
1228 | } else { | 1257 | } else { |
1229 | /* No data pending. Go to sleep */ | 1258 | /* No data pending. Go to sleep */ |
1230 | svc_serv_enqueue(serv, rqstp); | 1259 | svc_thread_enqueue(pool, rqstp); |
1231 | 1260 | ||
1232 | /* | 1261 | /* |
1233 | * We have to be able to interrupt this wait | 1262 | * We have to be able to interrupt this wait |
@@ -1235,26 +1264,26 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1235 | */ | 1264 | */ |
1236 | set_current_state(TASK_INTERRUPTIBLE); | 1265 | set_current_state(TASK_INTERRUPTIBLE); |
1237 | add_wait_queue(&rqstp->rq_wait, &wait); | 1266 | add_wait_queue(&rqstp->rq_wait, &wait); |
1238 | spin_unlock_bh(&serv->sv_lock); | 1267 | spin_unlock_bh(&pool->sp_lock); |
1239 | 1268 | ||
1240 | schedule_timeout(timeout); | 1269 | schedule_timeout(timeout); |
1241 | 1270 | ||
1242 | try_to_freeze(); | 1271 | try_to_freeze(); |
1243 | 1272 | ||
1244 | spin_lock_bh(&serv->sv_lock); | 1273 | spin_lock_bh(&pool->sp_lock); |
1245 | remove_wait_queue(&rqstp->rq_wait, &wait); | 1274 | remove_wait_queue(&rqstp->rq_wait, &wait); |
1246 | 1275 | ||
1247 | if (!(svsk = rqstp->rq_sock)) { | 1276 | if (!(svsk = rqstp->rq_sock)) { |
1248 | svc_serv_dequeue(serv, rqstp); | 1277 | svc_thread_dequeue(pool, rqstp); |
1249 | spin_unlock_bh(&serv->sv_lock); | 1278 | spin_unlock_bh(&pool->sp_lock); |
1250 | dprintk("svc: server %p, no data yet\n", rqstp); | 1279 | dprintk("svc: server %p, no data yet\n", rqstp); |
1251 | return signalled()? -EINTR : -EAGAIN; | 1280 | return signalled()? -EINTR : -EAGAIN; |
1252 | } | 1281 | } |
1253 | } | 1282 | } |
1254 | spin_unlock_bh(&serv->sv_lock); | 1283 | spin_unlock_bh(&pool->sp_lock); |
1255 | 1284 | ||
1256 | dprintk("svc: server %p, socket %p, inuse=%d\n", | 1285 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", |
1257 | rqstp, svsk, atomic_read(&svsk->sk_inuse)); | 1286 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); |
1258 | len = svsk->sk_recvfrom(rqstp); | 1287 | len = svsk->sk_recvfrom(rqstp); |
1259 | dprintk("svc: got len=%d\n", len); | 1288 | dprintk("svc: got len=%d\n", len); |
1260 | 1289 | ||
@@ -1553,7 +1582,13 @@ svc_delete_socket(struct svc_sock *svsk) | |||
1553 | 1582 | ||
1554 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | 1583 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) |
1555 | list_del_init(&svsk->sk_list); | 1584 | list_del_init(&svsk->sk_list); |
1556 | list_del_init(&svsk->sk_ready); | 1585 | /* |
1586 | * We used to delete the svc_sock from whichever list | ||
1587 | * it's sk_ready node was on, but we don't actually | ||
1588 | * need to. This is because the only time we're called | ||
1589 | * while still attached to a queue, the queue itself | ||
1590 | * is about to be destroyed (in svc_destroy). | ||
1591 | */ | ||
1557 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) | 1592 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) |
1558 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | 1593 | if (test_bit(SK_TEMP, &svsk->sk_flags)) |
1559 | serv->sv_tmpcnt--; | 1594 | serv->sv_tmpcnt--; |