aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/svcsock.c
diff options
context:
space:
mode:
authorGreg Banks <gnb@melbourne.sgi.com>2006-10-02 05:17:58 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-02 10:57:19 -0400
commit3262c816a3d7fb1eaabce633caa317887ed549ae (patch)
tree5b635d8b62b9724ab2b1e5563aad37e35b894406 /net/sunrpc/svcsock.c
parentc081a0c7cfe42adf8e8b9c2b8d0b2ec7f47603e8 (diff)
[PATCH] knfsd: split svc_serv into pools
Split out the list of idle threads and pending sockets from svc_serv into a new svc_pool structure, and allocate a fixed number (in this patch, 1) of pools per svc_serv. The new structure contains a lock which takes over several of the duties of svc_serv->sv_lock, which is now relegated to protecting only sv_tempsocks, sv_permsocks, and sv_tmpcnt in svc_serv. The point is to move the hottest fields out of svc_serv and into svc_pool, allowing a following patch to arrange for a svc_pool per NUMA node or per CPU. This is a major step towards making the NFS server NUMA-friendly. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r--net/sunrpc/svcsock.c125
1 files changed, 80 insertions, 45 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a38df4589ae9..b78659adeff3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -46,7 +46,10 @@
46 46
47/* SMP locking strategy: 47/* SMP locking strategy:
48 * 48 *
49 * svc_serv->sv_lock protects most stuff for that service. 49 * svc_pool->sp_lock protects most of the fields of that pool.
50 * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
51 * when both need to be taken (rare), svc_serv->sv_lock is first.
52 * BKL protects svc_serv->sv_nrthread.
50 * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list 53 * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list
51 * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. 54 * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply.
52 * 55 *
@@ -82,22 +85,22 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req);
82static int svc_conn_age_period = 6*60; 85static int svc_conn_age_period = 6*60;
83 86
84/* 87/*
85 * Queue up an idle server thread. Must have serv->sv_lock held. 88 * Queue up an idle server thread. Must have pool->sp_lock held.
86 * Note: this is really a stack rather than a queue, so that we only 89 * Note: this is really a stack rather than a queue, so that we only
87 * use as many different threads as we need, and the rest don't polute 90 * use as many different threads as we need, and the rest don't pollute
88 * the cache. 91 * the cache.
89 */ 92 */
90static inline void 93static inline void
91svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) 94svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
92{ 95{
93 list_add(&rqstp->rq_list, &serv->sv_threads); 96 list_add(&rqstp->rq_list, &pool->sp_threads);
94} 97}
95 98
96/* 99/*
97 * Dequeue an nfsd thread. Must have serv->sv_lock held. 100 * Dequeue an nfsd thread. Must have pool->sp_lock held.
98 */ 101 */
99static inline void 102static inline void
100svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) 103svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
101{ 104{
102 list_del(&rqstp->rq_list); 105 list_del(&rqstp->rq_list);
103} 106}
@@ -148,6 +151,7 @@ static void
148svc_sock_enqueue(struct svc_sock *svsk) 151svc_sock_enqueue(struct svc_sock *svsk)
149{ 152{
150 struct svc_serv *serv = svsk->sk_server; 153 struct svc_serv *serv = svsk->sk_server;
154 struct svc_pool *pool = &serv->sv_pools[0];
151 struct svc_rqst *rqstp; 155 struct svc_rqst *rqstp;
152 156
153 if (!(svsk->sk_flags & 157 if (!(svsk->sk_flags &
@@ -156,10 +160,10 @@ svc_sock_enqueue(struct svc_sock *svsk)
156 if (test_bit(SK_DEAD, &svsk->sk_flags)) 160 if (test_bit(SK_DEAD, &svsk->sk_flags))
157 return; 161 return;
158 162
159 spin_lock_bh(&serv->sv_lock); 163 spin_lock_bh(&pool->sp_lock);
160 164
161 if (!list_empty(&serv->sv_threads) && 165 if (!list_empty(&pool->sp_threads) &&
162 !list_empty(&serv->sv_sockets)) 166 !list_empty(&pool->sp_sockets))
163 printk(KERN_ERR 167 printk(KERN_ERR
164 "svc_sock_enqueue: threads and sockets both waiting??\n"); 168 "svc_sock_enqueue: threads and sockets both waiting??\n");
165 169
@@ -179,6 +183,8 @@ svc_sock_enqueue(struct svc_sock *svsk)
179 dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); 183 dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
180 goto out_unlock; 184 goto out_unlock;
181 } 185 }
186 BUG_ON(svsk->sk_pool != NULL);
187 svsk->sk_pool = pool;
182 188
183 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 189 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
184 if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 190 if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2
@@ -189,19 +195,20 @@ svc_sock_enqueue(struct svc_sock *svsk)
189 dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", 195 dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
190 svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, 196 svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz,
191 svc_sock_wspace(svsk)); 197 svc_sock_wspace(svsk));
198 svsk->sk_pool = NULL;
192 clear_bit(SK_BUSY, &svsk->sk_flags); 199 clear_bit(SK_BUSY, &svsk->sk_flags);
193 goto out_unlock; 200 goto out_unlock;
194 } 201 }
195 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 202 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
196 203
197 204
198 if (!list_empty(&serv->sv_threads)) { 205 if (!list_empty(&pool->sp_threads)) {
199 rqstp = list_entry(serv->sv_threads.next, 206 rqstp = list_entry(pool->sp_threads.next,
200 struct svc_rqst, 207 struct svc_rqst,
201 rq_list); 208 rq_list);
202 dprintk("svc: socket %p served by daemon %p\n", 209 dprintk("svc: socket %p served by daemon %p\n",
203 svsk->sk_sk, rqstp); 210 svsk->sk_sk, rqstp);
204 svc_serv_dequeue(serv, rqstp); 211 svc_thread_dequeue(pool, rqstp);
205 if (rqstp->rq_sock) 212 if (rqstp->rq_sock)
206 printk(KERN_ERR 213 printk(KERN_ERR
207 "svc_sock_enqueue: server %p, rq_sock=%p!\n", 214 "svc_sock_enqueue: server %p, rq_sock=%p!\n",
@@ -210,28 +217,30 @@ svc_sock_enqueue(struct svc_sock *svsk)
210 atomic_inc(&svsk->sk_inuse); 217 atomic_inc(&svsk->sk_inuse);
211 rqstp->rq_reserved = serv->sv_bufsz; 218 rqstp->rq_reserved = serv->sv_bufsz;
212 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); 219 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
220 BUG_ON(svsk->sk_pool != pool);
213 wake_up(&rqstp->rq_wait); 221 wake_up(&rqstp->rq_wait);
214 } else { 222 } else {
215 dprintk("svc: socket %p put into queue\n", svsk->sk_sk); 223 dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
216 list_add_tail(&svsk->sk_ready, &serv->sv_sockets); 224 list_add_tail(&svsk->sk_ready, &pool->sp_sockets);
225 BUG_ON(svsk->sk_pool != pool);
217 } 226 }
218 227
219out_unlock: 228out_unlock:
220 spin_unlock_bh(&serv->sv_lock); 229 spin_unlock_bh(&pool->sp_lock);
221} 230}
222 231
223/* 232/*
224 * Dequeue the first socket. Must be called with the serv->sv_lock held. 233 * Dequeue the first socket. Must be called with the pool->sp_lock held.
225 */ 234 */
226static inline struct svc_sock * 235static inline struct svc_sock *
227svc_sock_dequeue(struct svc_serv *serv) 236svc_sock_dequeue(struct svc_pool *pool)
228{ 237{
229 struct svc_sock *svsk; 238 struct svc_sock *svsk;
230 239
231 if (list_empty(&serv->sv_sockets)) 240 if (list_empty(&pool->sp_sockets))
232 return NULL; 241 return NULL;
233 242
234 svsk = list_entry(serv->sv_sockets.next, 243 svsk = list_entry(pool->sp_sockets.next,
235 struct svc_sock, sk_ready); 244 struct svc_sock, sk_ready);
236 list_del_init(&svsk->sk_ready); 245 list_del_init(&svsk->sk_ready);
237 246
@@ -250,6 +259,7 @@ svc_sock_dequeue(struct svc_serv *serv)
250static inline void 259static inline void
251svc_sock_received(struct svc_sock *svsk) 260svc_sock_received(struct svc_sock *svsk)
252{ 261{
262 svsk->sk_pool = NULL;
253 clear_bit(SK_BUSY, &svsk->sk_flags); 263 clear_bit(SK_BUSY, &svsk->sk_flags);
254 svc_sock_enqueue(svsk); 264 svc_sock_enqueue(svsk);
255} 265}
@@ -322,25 +332,33 @@ svc_sock_release(struct svc_rqst *rqstp)
322 332
323/* 333/*
324 * External function to wake up a server waiting for data 334 * External function to wake up a server waiting for data
335 * This really only makes sense for services like lockd
336 * which have exactly one thread anyway.
325 */ 337 */
326void 338void
327svc_wake_up(struct svc_serv *serv) 339svc_wake_up(struct svc_serv *serv)
328{ 340{
329 struct svc_rqst *rqstp; 341 struct svc_rqst *rqstp;
330 342 unsigned int i;
331 spin_lock_bh(&serv->sv_lock); 343 struct svc_pool *pool;
332 if (!list_empty(&serv->sv_threads)) { 344
333 rqstp = list_entry(serv->sv_threads.next, 345 for (i = 0; i < serv->sv_nrpools; i++) {
334 struct svc_rqst, 346 pool = &serv->sv_pools[i];
335 rq_list); 347
336 dprintk("svc: daemon %p woken up.\n", rqstp); 348 spin_lock_bh(&pool->sp_lock);
337 /* 349 if (!list_empty(&pool->sp_threads)) {
338 svc_serv_dequeue(serv, rqstp); 350 rqstp = list_entry(pool->sp_threads.next,
339 rqstp->rq_sock = NULL; 351 struct svc_rqst,
340 */ 352 rq_list);
341 wake_up(&rqstp->rq_wait); 353 dprintk("svc: daemon %p woken up.\n", rqstp);
354 /*
355 svc_thread_dequeue(pool, rqstp);
356 rqstp->rq_sock = NULL;
357 */
358 wake_up(&rqstp->rq_wait);
359 }
360 spin_unlock_bh(&pool->sp_lock);
342 } 361 }
343 spin_unlock_bh(&serv->sv_lock);
344} 362}
345 363
346/* 364/*
@@ -603,7 +621,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
603 /* udp sockets need large rcvbuf as all pending 621 /* udp sockets need large rcvbuf as all pending
604 * requests are still in that buffer. sndbuf must 622 * requests are still in that buffer. sndbuf must
605 * also be large enough that there is enough space 623 * also be large enough that there is enough space
606 * for one reply per thread. 624 * for one reply per thread. We count all threads
625 * rather than threads in a particular pool, which
626 * provides an upper bound on the number of threads
627 * which will access the socket.
607 */ 628 */
608 svc_sock_setbufsize(svsk->sk_sock, 629 svc_sock_setbufsize(svsk->sk_sock,
609 (serv->sv_nrthreads+3) * serv->sv_bufsz, 630 (serv->sv_nrthreads+3) * serv->sv_bufsz,
@@ -948,6 +969,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
948 /* sndbuf needs to have room for one request 969 /* sndbuf needs to have room for one request
949 * per thread, otherwise we can stall even when the 970 * per thread, otherwise we can stall even when the
950 * network isn't a bottleneck. 971 * network isn't a bottleneck.
972 *
973 * We count all threads rather than threads in a
974 * particular pool, which provides an upper bound
975 * on the number of threads which will access the socket.
976 *
951 * rcvbuf just needs to be able to hold a few requests. 977 * rcvbuf just needs to be able to hold a few requests.
952 * Normally they will be removed from the queue 978 * Normally they will be removed from the queue
953 * as soon a a complete request arrives. 979 * as soon a a complete request arrives.
@@ -1163,13 +1189,16 @@ svc_sock_update_bufs(struct svc_serv *serv)
1163} 1189}
1164 1190
1165/* 1191/*
1166 * Receive the next request on any socket. 1192 * Receive the next request on any socket. This code is carefully
1193 * organised not to touch any cachelines in the shared svc_serv
1194 * structure, only cachelines in the local svc_pool.
1167 */ 1195 */
1168int 1196int
1169svc_recv(struct svc_rqst *rqstp, long timeout) 1197svc_recv(struct svc_rqst *rqstp, long timeout)
1170{ 1198{
1171 struct svc_sock *svsk =NULL; 1199 struct svc_sock *svsk =NULL;
1172 struct svc_serv *serv = rqstp->rq_server; 1200 struct svc_serv *serv = rqstp->rq_server;
1201 struct svc_pool *pool = rqstp->rq_pool;
1173 int len; 1202 int len;
1174 int pages; 1203 int pages;
1175 struct xdr_buf *arg; 1204 struct xdr_buf *arg;
@@ -1219,15 +1248,15 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
1219 if (signalled()) 1248 if (signalled())
1220 return -EINTR; 1249 return -EINTR;
1221 1250
1222 spin_lock_bh(&serv->sv_lock); 1251 spin_lock_bh(&pool->sp_lock);
1223 if ((svsk = svc_sock_dequeue(serv)) != NULL) { 1252 if ((svsk = svc_sock_dequeue(pool)) != NULL) {
1224 rqstp->rq_sock = svsk; 1253 rqstp->rq_sock = svsk;
1225 atomic_inc(&svsk->sk_inuse); 1254 atomic_inc(&svsk->sk_inuse);
1226 rqstp->rq_reserved = serv->sv_bufsz; 1255 rqstp->rq_reserved = serv->sv_bufsz;
1227 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); 1256 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
1228 } else { 1257 } else {
1229 /* No data pending. Go to sleep */ 1258 /* No data pending. Go to sleep */
1230 svc_serv_enqueue(serv, rqstp); 1259 svc_thread_enqueue(pool, rqstp);
1231 1260
1232 /* 1261 /*
1233 * We have to be able to interrupt this wait 1262 * We have to be able to interrupt this wait
@@ -1235,26 +1264,26 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
1235 */ 1264 */
1236 set_current_state(TASK_INTERRUPTIBLE); 1265 set_current_state(TASK_INTERRUPTIBLE);
1237 add_wait_queue(&rqstp->rq_wait, &wait); 1266 add_wait_queue(&rqstp->rq_wait, &wait);
1238 spin_unlock_bh(&serv->sv_lock); 1267 spin_unlock_bh(&pool->sp_lock);
1239 1268
1240 schedule_timeout(timeout); 1269 schedule_timeout(timeout);
1241 1270
1242 try_to_freeze(); 1271 try_to_freeze();
1243 1272
1244 spin_lock_bh(&serv->sv_lock); 1273 spin_lock_bh(&pool->sp_lock);
1245 remove_wait_queue(&rqstp->rq_wait, &wait); 1274 remove_wait_queue(&rqstp->rq_wait, &wait);
1246 1275
1247 if (!(svsk = rqstp->rq_sock)) { 1276 if (!(svsk = rqstp->rq_sock)) {
1248 svc_serv_dequeue(serv, rqstp); 1277 svc_thread_dequeue(pool, rqstp);
1249 spin_unlock_bh(&serv->sv_lock); 1278 spin_unlock_bh(&pool->sp_lock);
1250 dprintk("svc: server %p, no data yet\n", rqstp); 1279 dprintk("svc: server %p, no data yet\n", rqstp);
1251 return signalled()? -EINTR : -EAGAIN; 1280 return signalled()? -EINTR : -EAGAIN;
1252 } 1281 }
1253 } 1282 }
1254 spin_unlock_bh(&serv->sv_lock); 1283 spin_unlock_bh(&pool->sp_lock);
1255 1284
1256 dprintk("svc: server %p, socket %p, inuse=%d\n", 1285 dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
1257 rqstp, svsk, atomic_read(&svsk->sk_inuse)); 1286 rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
1258 len = svsk->sk_recvfrom(rqstp); 1287 len = svsk->sk_recvfrom(rqstp);
1259 dprintk("svc: got len=%d\n", len); 1288 dprintk("svc: got len=%d\n", len);
1260 1289
@@ -1553,7 +1582,13 @@ svc_delete_socket(struct svc_sock *svsk)
1553 1582
1554 if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) 1583 if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
1555 list_del_init(&svsk->sk_list); 1584 list_del_init(&svsk->sk_list);
1556 list_del_init(&svsk->sk_ready); 1585 /*
1586 * We used to delete the svc_sock from whichever list
1587 * it's sk_ready node was on, but we don't actually
1588 * need to. This is because the only time we're called
1589 * while still attached to a queue, the queue itself
1590 * is about to be destroyed (in svc_destroy).
1591 */
1557 if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) 1592 if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
1558 if (test_bit(SK_TEMP, &svsk->sk_flags)) 1593 if (test_bit(SK_TEMP, &svsk->sk_flags))
1559 serv->sv_tmpcnt--; 1594 serv->sv_tmpcnt--;