aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/svcsock.c
diff options
context:
space:
mode:
authorGreg Banks <gnb@melbourne.sgi.com>2006-10-02 05:17:54 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-02 10:57:19 -0400
commit36bdfc8bae51339aa27ef8e4ce148185293061ae (patch)
treedeab54ff70d6991c1e5be0d9efe97d10f65375b0 /net/sunrpc/svcsock.c
parent4a3ae42dc312dbdffee803efaf393421b79f997a (diff)
[PATCH] knfsd: move tempsock aging to a timer
Following are 11 patches from Greg Banks which combine to make knfsd more Numa-aware. They reduce hitting on 'global' data structures, and create some data-structures that can be node-local. knfsd threads are bound to a particular node, and the thread to handle a new request is chosen from the threads that are attach to the node that received the interrupt. The distribution of threads across nodes can be controlled by a new file in the 'nfsd' filesystem, though the default approach of an even spread is probably fine for most sites. Some (old) numbers that show the efficacy of these patches: N == number of NICs == number of CPUs == nmber of clients. Number of NUMA nodes == N/2 N Throughput, MiB/s CPU usage, % (max=N*100) Before After Before After --- ------ ---- ----- ----- 4 312 435 350 228 6 500 656 501 418 8 562 804 690 589 This patch: Move the aging of RPC/TCP connection sockets from the main svc_recv() loop to a timer which uses a mark-and-sweep algorithm every 6 minutes. This reduces the amount of work that needs to be done in the main RPC loop and the length of time we need to hold the (effectively global) svc_serv->sv_lock. [akpm@osdl.org: cleanup] Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r--net/sunrpc/svcsock.c96
1 files changed, 70 insertions, 26 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index bc9bd189a540..9ba1a071ff06 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -74,6 +74,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
74static int svc_deferred_recv(struct svc_rqst *rqstp); 74static int svc_deferred_recv(struct svc_rqst *rqstp);
75static struct cache_deferred_req *svc_defer(struct cache_req *req); 75static struct cache_deferred_req *svc_defer(struct cache_req *req);
76 76
77/* apparently the "standard" is that clients close
78 * idle connections after 5 minutes, servers after
79 * 6 minutes
80 * http://www.connectathon.org/talks96/nfstcp.pdf
81 */
82static int svc_conn_age_period = 6*60;
83
77/* 84/*
78 * Queue up an idle server thread. Must have serv->sv_lock held. 85 * Queue up an idle server thread. Must have serv->sv_lock held.
79 * Note: this is really a stack rather than a queue, so that we only 86 * Note: this is really a stack rather than a queue, so that we only
@@ -1220,24 +1227,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
1220 return -EINTR; 1227 return -EINTR;
1221 1228
1222 spin_lock_bh(&serv->sv_lock); 1229 spin_lock_bh(&serv->sv_lock);
1223 if (!list_empty(&serv->sv_tempsocks)) { 1230 if ((svsk = svc_sock_dequeue(serv)) != NULL) {
1224 svsk = list_entry(serv->sv_tempsocks.next,
1225 struct svc_sock, sk_list);
1226 /* apparently the "standard" is that clients close
1227 * idle connections after 5 minutes, servers after
1228 * 6 minutes
1229 * http://www.connectathon.org/talks96/nfstcp.pdf
1230 */
1231 if (get_seconds() - svsk->sk_lastrecv < 6*60
1232 || test_bit(SK_BUSY, &svsk->sk_flags))
1233 svsk = NULL;
1234 }
1235 if (svsk) {
1236 set_bit(SK_BUSY, &svsk->sk_flags);
1237 set_bit(SK_CLOSE, &svsk->sk_flags);
1238 rqstp->rq_sock = svsk;
1239 svsk->sk_inuse++;
1240 } else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
1241 rqstp->rq_sock = svsk; 1231 rqstp->rq_sock = svsk;
1242 svsk->sk_inuse++; 1232 svsk->sk_inuse++;
1243 rqstp->rq_reserved = serv->sv_bufsz; 1233 rqstp->rq_reserved = serv->sv_bufsz;
@@ -1282,13 +1272,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
1282 return -EAGAIN; 1272 return -EAGAIN;
1283 } 1273 }
1284 svsk->sk_lastrecv = get_seconds(); 1274 svsk->sk_lastrecv = get_seconds();
1285 if (test_bit(SK_TEMP, &svsk->sk_flags)) { 1275 clear_bit(SK_OLD, &svsk->sk_flags);
1286 /* push active sockets to end of list */
1287 spin_lock_bh(&serv->sv_lock);
1288 if (!list_empty(&svsk->sk_list))
1289 list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
1290 spin_unlock_bh(&serv->sv_lock);
1291 }
1292 1276
1293 rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; 1277 rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024;
1294 rqstp->rq_chandle.defer = svc_defer; 1278 rqstp->rq_chandle.defer = svc_defer;
@@ -1348,6 +1332,58 @@ svc_send(struct svc_rqst *rqstp)
1348} 1332}
1349 1333
1350/* 1334/*
1335 * Timer function to close old temporary sockets, using
1336 * a mark-and-sweep algorithm.
1337 */
1338static void
1339svc_age_temp_sockets(unsigned long closure)
1340{
1341 struct svc_serv *serv = (struct svc_serv *)closure;
1342 struct svc_sock *svsk;
1343 struct list_head *le, *next;
1344 LIST_HEAD(to_be_aged);
1345
1346 dprintk("svc_age_temp_sockets\n");
1347
1348 if (!spin_trylock_bh(&serv->sv_lock)) {
1349 /* busy, try again 1 sec later */
1350 dprintk("svc_age_temp_sockets: busy\n");
1351 mod_timer(&serv->sv_temptimer, jiffies + HZ);
1352 return;
1353 }
1354
1355 list_for_each_safe(le, next, &serv->sv_tempsocks) {
1356 svsk = list_entry(le, struct svc_sock, sk_list);
1357
1358 if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
1359 continue;
1360 if (svsk->sk_inuse || test_bit(SK_BUSY, &svsk->sk_flags))
1361 continue;
1362 svsk->sk_inuse++;
1363 list_move(le, &to_be_aged);
1364 set_bit(SK_CLOSE, &svsk->sk_flags);
1365 set_bit(SK_DETACHED, &svsk->sk_flags);
1366 }
1367 spin_unlock_bh(&serv->sv_lock);
1368
1369 while (!list_empty(&to_be_aged)) {
1370 le = to_be_aged.next;
1371 /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
1372 list_del_init(le);
1373 svsk = list_entry(le, struct svc_sock, sk_list);
1374
1375 dprintk("queuing svsk %p for closing, %lu seconds old\n",
1376 svsk, get_seconds() - svsk->sk_lastrecv);
1377
1378 /* a thread will dequeue and close it soon */
1379 svc_sock_enqueue(svsk);
1380 svc_sock_put(svsk);
1381 }
1382
1383 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
1384}
1385
1386/*
1351 * Initialize socket for RPC use and create svc_sock struct 1387 * Initialize socket for RPC use and create svc_sock struct
1352 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. 1388 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1353 */ 1389 */
@@ -1400,6 +1436,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
1400 set_bit(SK_TEMP, &svsk->sk_flags); 1436 set_bit(SK_TEMP, &svsk->sk_flags);
1401 list_add(&svsk->sk_list, &serv->sv_tempsocks); 1437 list_add(&svsk->sk_list, &serv->sv_tempsocks);
1402 serv->sv_tmpcnt++; 1438 serv->sv_tmpcnt++;
1439 if (serv->sv_temptimer.function == NULL) {
1440 /* setup timer to age temp sockets */
1441 setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
1442 (unsigned long)serv);
1443 mod_timer(&serv->sv_temptimer,
1444 jiffies + svc_conn_age_period * HZ);
1445 }
1403 } else { 1446 } else {
1404 clear_bit(SK_TEMP, &svsk->sk_flags); 1447 clear_bit(SK_TEMP, &svsk->sk_flags);
1405 list_add(&svsk->sk_list, &serv->sv_permsocks); 1448 list_add(&svsk->sk_list, &serv->sv_permsocks);
@@ -1513,7 +1556,8 @@ svc_delete_socket(struct svc_sock *svsk)
1513 1556
1514 spin_lock_bh(&serv->sv_lock); 1557 spin_lock_bh(&serv->sv_lock);
1515 1558
1516 list_del_init(&svsk->sk_list); 1559 if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
1560 list_del_init(&svsk->sk_list);
1517 list_del_init(&svsk->sk_ready); 1561 list_del_init(&svsk->sk_ready);
1518 if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) 1562 if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
1519 if (test_bit(SK_TEMP, &svsk->sk_flags)) 1563 if (test_bit(SK_TEMP, &svsk->sk_flags))