diff options
author | Greg Banks <gnb@melbourne.sgi.com> | 2006-10-02 05:17:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-02 10:57:19 -0400 |
commit | 36bdfc8bae51339aa27ef8e4ce148185293061ae (patch) | |
tree | deab54ff70d6991c1e5be0d9efe97d10f65375b0 /net/sunrpc | |
parent | 4a3ae42dc312dbdffee803efaf393421b79f997a (diff) |
[PATCH] knfsd: move tempsock aging to a timer
Following are 11 patches from Greg Banks which combine to make knfsd more
Numa-aware. They reduce hitting on 'global' data structures, and create some
data-structures that can be node-local.
knfsd threads are bound to a particular node, and the thread to handle a new
request is chosen from the threads that are attach to the node that received
the interrupt.
The distribution of threads across nodes can be controlled by a new file in
the 'nfsd' filesystem, though the default approach of an even spread is
probably fine for most sites.
Some (old) numbers that show the efficacy of these patches: N == number of
NICs == number of CPUs == nmber of clients. Number of NUMA nodes == N/2
N Throughput, MiB/s CPU usage, % (max=N*100)
Before After Before After
--- ------ ---- ----- -----
4 312 435 350 228
6 500 656 501 418
8 562 804 690 589
This patch:
Move the aging of RPC/TCP connection sockets from the main svc_recv() loop to
a timer which uses a mark-and-sweep algorithm every 6 minutes. This reduces
the amount of work that needs to be done in the main RPC loop and the length
of time we need to hold the (effectively global) svc_serv->sv_lock.
[akpm@osdl.org: cleanup]
Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/svc.c | 3 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 96 |
2 files changed, 73 insertions, 26 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index eee45a58f3ee..0c2c52276285 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -59,6 +59,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
59 | INIT_LIST_HEAD(&serv->sv_sockets); | 59 | INIT_LIST_HEAD(&serv->sv_sockets); |
60 | INIT_LIST_HEAD(&serv->sv_tempsocks); | 60 | INIT_LIST_HEAD(&serv->sv_tempsocks); |
61 | INIT_LIST_HEAD(&serv->sv_permsocks); | 61 | INIT_LIST_HEAD(&serv->sv_permsocks); |
62 | init_timer(&serv->sv_temptimer); | ||
62 | spin_lock_init(&serv->sv_lock); | 63 | spin_lock_init(&serv->sv_lock); |
63 | 64 | ||
64 | /* Remove any stale portmap registrations */ | 65 | /* Remove any stale portmap registrations */ |
@@ -87,6 +88,8 @@ svc_destroy(struct svc_serv *serv) | |||
87 | } else | 88 | } else |
88 | printk("svc_destroy: no threads for serv=%p!\n", serv); | 89 | printk("svc_destroy: no threads for serv=%p!\n", serv); |
89 | 90 | ||
91 | del_timer_sync(&serv->sv_temptimer); | ||
92 | |||
90 | while (!list_empty(&serv->sv_tempsocks)) { | 93 | while (!list_empty(&serv->sv_tempsocks)) { |
91 | svsk = list_entry(serv->sv_tempsocks.next, | 94 | svsk = list_entry(serv->sv_tempsocks.next, |
92 | struct svc_sock, | 95 | struct svc_sock, |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index bc9bd189a540..9ba1a071ff06 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -74,6 +74,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); | |||
74 | static int svc_deferred_recv(struct svc_rqst *rqstp); | 74 | static int svc_deferred_recv(struct svc_rqst *rqstp); |
75 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | 75 | static struct cache_deferred_req *svc_defer(struct cache_req *req); |
76 | 76 | ||
77 | /* apparently the "standard" is that clients close | ||
78 | * idle connections after 5 minutes, servers after | ||
79 | * 6 minutes | ||
80 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
81 | */ | ||
82 | static int svc_conn_age_period = 6*60; | ||
83 | |||
77 | /* | 84 | /* |
78 | * Queue up an idle server thread. Must have serv->sv_lock held. | 85 | * Queue up an idle server thread. Must have serv->sv_lock held. |
79 | * Note: this is really a stack rather than a queue, so that we only | 86 | * Note: this is really a stack rather than a queue, so that we only |
@@ -1220,24 +1227,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1220 | return -EINTR; | 1227 | return -EINTR; |
1221 | 1228 | ||
1222 | spin_lock_bh(&serv->sv_lock); | 1229 | spin_lock_bh(&serv->sv_lock); |
1223 | if (!list_empty(&serv->sv_tempsocks)) { | 1230 | if ((svsk = svc_sock_dequeue(serv)) != NULL) { |
1224 | svsk = list_entry(serv->sv_tempsocks.next, | ||
1225 | struct svc_sock, sk_list); | ||
1226 | /* apparently the "standard" is that clients close | ||
1227 | * idle connections after 5 minutes, servers after | ||
1228 | * 6 minutes | ||
1229 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
1230 | */ | ||
1231 | if (get_seconds() - svsk->sk_lastrecv < 6*60 | ||
1232 | || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
1233 | svsk = NULL; | ||
1234 | } | ||
1235 | if (svsk) { | ||
1236 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
1237 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1238 | rqstp->rq_sock = svsk; | ||
1239 | svsk->sk_inuse++; | ||
1240 | } else if ((svsk = svc_sock_dequeue(serv)) != NULL) { | ||
1241 | rqstp->rq_sock = svsk; | 1231 | rqstp->rq_sock = svsk; |
1242 | svsk->sk_inuse++; | 1232 | svsk->sk_inuse++; |
1243 | rqstp->rq_reserved = serv->sv_bufsz; | 1233 | rqstp->rq_reserved = serv->sv_bufsz; |
@@ -1282,13 +1272,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1282 | return -EAGAIN; | 1272 | return -EAGAIN; |
1283 | } | 1273 | } |
1284 | svsk->sk_lastrecv = get_seconds(); | 1274 | svsk->sk_lastrecv = get_seconds(); |
1285 | if (test_bit(SK_TEMP, &svsk->sk_flags)) { | 1275 | clear_bit(SK_OLD, &svsk->sk_flags); |
1286 | /* push active sockets to end of list */ | ||
1287 | spin_lock_bh(&serv->sv_lock); | ||
1288 | if (!list_empty(&svsk->sk_list)) | ||
1289 | list_move_tail(&svsk->sk_list, &serv->sv_tempsocks); | ||
1290 | spin_unlock_bh(&serv->sv_lock); | ||
1291 | } | ||
1292 | 1276 | ||
1293 | rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; | 1277 | rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; |
1294 | rqstp->rq_chandle.defer = svc_defer; | 1278 | rqstp->rq_chandle.defer = svc_defer; |
@@ -1348,6 +1332,58 @@ svc_send(struct svc_rqst *rqstp) | |||
1348 | } | 1332 | } |
1349 | 1333 | ||
1350 | /* | 1334 | /* |
1335 | * Timer function to close old temporary sockets, using | ||
1336 | * a mark-and-sweep algorithm. | ||
1337 | */ | ||
1338 | static void | ||
1339 | svc_age_temp_sockets(unsigned long closure) | ||
1340 | { | ||
1341 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
1342 | struct svc_sock *svsk; | ||
1343 | struct list_head *le, *next; | ||
1344 | LIST_HEAD(to_be_aged); | ||
1345 | |||
1346 | dprintk("svc_age_temp_sockets\n"); | ||
1347 | |||
1348 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
1349 | /* busy, try again 1 sec later */ | ||
1350 | dprintk("svc_age_temp_sockets: busy\n"); | ||
1351 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
1352 | return; | ||
1353 | } | ||
1354 | |||
1355 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
1356 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1357 | |||
1358 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) | ||
1359 | continue; | ||
1360 | if (svsk->sk_inuse || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
1361 | continue; | ||
1362 | svsk->sk_inuse++; | ||
1363 | list_move(le, &to_be_aged); | ||
1364 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1365 | set_bit(SK_DETACHED, &svsk->sk_flags); | ||
1366 | } | ||
1367 | spin_unlock_bh(&serv->sv_lock); | ||
1368 | |||
1369 | while (!list_empty(&to_be_aged)) { | ||
1370 | le = to_be_aged.next; | ||
1371 | /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ | ||
1372 | list_del_init(le); | ||
1373 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1374 | |||
1375 | dprintk("queuing svsk %p for closing, %lu seconds old\n", | ||
1376 | svsk, get_seconds() - svsk->sk_lastrecv); | ||
1377 | |||
1378 | /* a thread will dequeue and close it soon */ | ||
1379 | svc_sock_enqueue(svsk); | ||
1380 | svc_sock_put(svsk); | ||
1381 | } | ||
1382 | |||
1383 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
1384 | } | ||
1385 | |||
1386 | /* | ||
1351 | * Initialize socket for RPC use and create svc_sock struct | 1387 | * Initialize socket for RPC use and create svc_sock struct |
1352 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. | 1388 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. |
1353 | */ | 1389 | */ |
@@ -1400,6 +1436,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, | |||
1400 | set_bit(SK_TEMP, &svsk->sk_flags); | 1436 | set_bit(SK_TEMP, &svsk->sk_flags); |
1401 | list_add(&svsk->sk_list, &serv->sv_tempsocks); | 1437 | list_add(&svsk->sk_list, &serv->sv_tempsocks); |
1402 | serv->sv_tmpcnt++; | 1438 | serv->sv_tmpcnt++; |
1439 | if (serv->sv_temptimer.function == NULL) { | ||
1440 | /* setup timer to age temp sockets */ | ||
1441 | setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, | ||
1442 | (unsigned long)serv); | ||
1443 | mod_timer(&serv->sv_temptimer, | ||
1444 | jiffies + svc_conn_age_period * HZ); | ||
1445 | } | ||
1403 | } else { | 1446 | } else { |
1404 | clear_bit(SK_TEMP, &svsk->sk_flags); | 1447 | clear_bit(SK_TEMP, &svsk->sk_flags); |
1405 | list_add(&svsk->sk_list, &serv->sv_permsocks); | 1448 | list_add(&svsk->sk_list, &serv->sv_permsocks); |
@@ -1513,7 +1556,8 @@ svc_delete_socket(struct svc_sock *svsk) | |||
1513 | 1556 | ||
1514 | spin_lock_bh(&serv->sv_lock); | 1557 | spin_lock_bh(&serv->sv_lock); |
1515 | 1558 | ||
1516 | list_del_init(&svsk->sk_list); | 1559 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) |
1560 | list_del_init(&svsk->sk_list); | ||
1517 | list_del_init(&svsk->sk_ready); | 1561 | list_del_init(&svsk->sk_ready); |
1518 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) | 1562 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) |
1519 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | 1563 | if (test_bit(SK_TEMP, &svsk->sk_flags)) |