diff options
| author | Greg Banks <gnb@melbourne.sgi.com> | 2006-10-02 05:17:58 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-02 10:57:19 -0400 |
| commit | 3262c816a3d7fb1eaabce633caa317887ed549ae (patch) | |
| tree | 5b635d8b62b9724ab2b1e5563aad37e35b894406 | |
| parent | c081a0c7cfe42adf8e8b9c2b8d0b2ec7f47603e8 (diff) | |
[PATCH] knfsd: split svc_serv into pools
Split out the list of idle threads and pending sockets from svc_serv into a
new svc_pool structure, and allocate a fixed number (in this patch, 1) of
pools per svc_serv. The new structure contains a lock which takes over
several of the duties of svc_serv->sv_lock, which is now relegated to
protecting only sv_tempsocks, sv_permsocks, and sv_tmpcnt in svc_serv.
The point is to move the hottest fields out of svc_serv and into svc_pool,
allowing a following patch to arrange for a svc_pool per NUMA node or per CPU.
This is a major step towards making the NFS server NUMA-friendly.
Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | include/linux/sunrpc/svc.h | 25 | ||||
| -rw-r--r-- | include/linux/sunrpc/svcsock.h | 1 | ||||
| -rw-r--r-- | net/sunrpc/svc.c | 56 | ||||
| -rw-r--r-- | net/sunrpc/svcsock.c | 125 |
4 files changed, 153 insertions, 54 deletions
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5eabded110..c27d806af3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
| @@ -17,6 +17,25 @@ | |||
| 17 | #include <linux/wait.h> | 17 | #include <linux/wait.h> |
| 18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
| 19 | 19 | ||
| 20 | |||
| 21 | /* | ||
| 22 | * | ||
| 23 | * RPC service thread pool. | ||
| 24 | * | ||
| 25 | * Pool of threads and temporary sockets. Generally there is only | ||
| 26 | * a single one of these per RPC service, but on NUMA machines those | ||
| 27 | * services that can benefit from it (i.e. nfs but not lockd) will | ||
| 28 | * have one pool per NUMA node. This optimisation reduces cross- | ||
| 29 | * node traffic on multi-node NUMA NFS servers. | ||
| 30 | */ | ||
| 31 | struct svc_pool { | ||
| 32 | unsigned int sp_id; /* pool id; also node id on NUMA */ | ||
| 33 | spinlock_t sp_lock; /* protects all fields */ | ||
| 34 | struct list_head sp_threads; /* idle server threads */ | ||
| 35 | struct list_head sp_sockets; /* pending sockets */ | ||
| 36 | unsigned int sp_nrthreads; /* # of threads in pool */ | ||
| 37 | } ____cacheline_aligned_in_smp; | ||
| 38 | |||
| 20 | /* | 39 | /* |
| 21 | * RPC service. | 40 | * RPC service. |
| 22 | * | 41 | * |
| @@ -28,8 +47,6 @@ | |||
| 28 | * We currently do not support more than one RPC program per daemon. | 47 | * We currently do not support more than one RPC program per daemon. |
| 29 | */ | 48 | */ |
| 30 | struct svc_serv { | 49 | struct svc_serv { |
| 31 | struct list_head sv_threads; /* idle server threads */ | ||
| 32 | struct list_head sv_sockets; /* pending sockets */ | ||
| 33 | struct svc_program * sv_program; /* RPC program */ | 50 | struct svc_program * sv_program; /* RPC program */ |
| 34 | struct svc_stat * sv_stats; /* RPC statistics */ | 51 | struct svc_stat * sv_stats; /* RPC statistics */ |
| 35 | spinlock_t sv_lock; | 52 | spinlock_t sv_lock; |
| @@ -44,6 +61,9 @@ struct svc_serv { | |||
| 44 | 61 | ||
| 45 | char * sv_name; /* service name */ | 62 | char * sv_name; /* service name */ |
| 46 | 63 | ||
| 64 | unsigned int sv_nrpools; /* number of thread pools */ | ||
| 65 | struct svc_pool * sv_pools; /* array of thread pools */ | ||
| 66 | |||
| 47 | void (*sv_shutdown)(struct svc_serv *serv); | 67 | void (*sv_shutdown)(struct svc_serv *serv); |
| 48 | /* Callback to use when last thread | 68 | /* Callback to use when last thread |
| 49 | * exits. | 69 | * exits. |
| @@ -138,6 +158,7 @@ struct svc_rqst { | |||
| 138 | int rq_addrlen; | 158 | int rq_addrlen; |
| 139 | 159 | ||
| 140 | struct svc_serv * rq_server; /* RPC service definition */ | 160 | struct svc_serv * rq_server; /* RPC service definition */ |
| 161 | struct svc_pool * rq_pool; /* thread pool */ | ||
| 141 | struct svc_procedure * rq_procinfo; /* procedure info */ | 162 | struct svc_procedure * rq_procinfo; /* procedure info */ |
| 142 | struct auth_ops * rq_authop; /* authentication flavour */ | 163 | struct auth_ops * rq_authop; /* authentication flavour */ |
| 143 | struct svc_cred rq_cred; /* auth info */ | 164 | struct svc_cred rq_cred; /* auth info */ |
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 7154e71c6d..4c296152cb 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h | |||
| @@ -20,6 +20,7 @@ struct svc_sock { | |||
| 20 | struct socket * sk_sock; /* berkeley socket layer */ | 20 | struct socket * sk_sock; /* berkeley socket layer */ |
| 21 | struct sock * sk_sk; /* INET layer */ | 21 | struct sock * sk_sk; /* INET layer */ |
| 22 | 22 | ||
| 23 | struct svc_pool * sk_pool; /* current pool iff queued */ | ||
| 23 | struct svc_serv * sk_server; /* service for this socket */ | 24 | struct svc_serv * sk_server; /* service for this socket */ |
| 24 | atomic_t sk_inuse; /* use count */ | 25 | atomic_t sk_inuse; /* use count */ |
| 25 | unsigned long sk_flags; | 26 | unsigned long sk_flags; |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 0c2c522762..6750cd474f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
| @@ -32,6 +32,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
| 32 | struct svc_serv *serv; | 32 | struct svc_serv *serv; |
| 33 | int vers; | 33 | int vers; |
| 34 | unsigned int xdrsize; | 34 | unsigned int xdrsize; |
| 35 | unsigned int i; | ||
| 35 | 36 | ||
| 36 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) | 37 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) |
| 37 | return NULL; | 38 | return NULL; |
| @@ -55,13 +56,33 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
| 55 | prog = prog->pg_next; | 56 | prog = prog->pg_next; |
| 56 | } | 57 | } |
| 57 | serv->sv_xdrsize = xdrsize; | 58 | serv->sv_xdrsize = xdrsize; |
| 58 | INIT_LIST_HEAD(&serv->sv_threads); | ||
| 59 | INIT_LIST_HEAD(&serv->sv_sockets); | ||
| 60 | INIT_LIST_HEAD(&serv->sv_tempsocks); | 59 | INIT_LIST_HEAD(&serv->sv_tempsocks); |
| 61 | INIT_LIST_HEAD(&serv->sv_permsocks); | 60 | INIT_LIST_HEAD(&serv->sv_permsocks); |
| 62 | init_timer(&serv->sv_temptimer); | 61 | init_timer(&serv->sv_temptimer); |
| 63 | spin_lock_init(&serv->sv_lock); | 62 | spin_lock_init(&serv->sv_lock); |
| 64 | 63 | ||
| 64 | serv->sv_nrpools = 1; | ||
| 65 | serv->sv_pools = | ||
| 66 | kcalloc(sizeof(struct svc_pool), serv->sv_nrpools, | ||
| 67 | GFP_KERNEL); | ||
| 68 | if (!serv->sv_pools) { | ||
| 69 | kfree(serv); | ||
| 70 | return NULL; | ||
| 71 | } | ||
| 72 | |||
| 73 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
| 74 | struct svc_pool *pool = &serv->sv_pools[i]; | ||
| 75 | |||
| 76 | dprintk("initialising pool %u for %s\n", | ||
| 77 | i, serv->sv_name); | ||
| 78 | |||
| 79 | pool->sp_id = i; | ||
| 80 | INIT_LIST_HEAD(&pool->sp_threads); | ||
| 81 | INIT_LIST_HEAD(&pool->sp_sockets); | ||
| 82 | spin_lock_init(&pool->sp_lock); | ||
| 83 | } | ||
| 84 | |||
| 85 | |||
| 65 | /* Remove any stale portmap registrations */ | 86 | /* Remove any stale portmap registrations */ |
| 66 | svc_register(serv, 0, 0); | 87 | svc_register(serv, 0, 0); |
| 67 | 88 | ||
| @@ -69,7 +90,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
| 69 | } | 90 | } |
| 70 | 91 | ||
| 71 | /* | 92 | /* |
| 72 | * Destroy an RPC service | 93 | * Destroy an RPC service. Should be called with the BKL held |
| 73 | */ | 94 | */ |
| 74 | void | 95 | void |
| 75 | svc_destroy(struct svc_serv *serv) | 96 | svc_destroy(struct svc_serv *serv) |
| @@ -110,6 +131,7 @@ svc_destroy(struct svc_serv *serv) | |||
| 110 | 131 | ||
| 111 | /* Unregister service with the portmapper */ | 132 | /* Unregister service with the portmapper */ |
| 112 | svc_register(serv, 0, 0); | 133 | svc_register(serv, 0, 0); |
| 134 | kfree(serv->sv_pools); | ||
| 113 | kfree(serv); | 135 | kfree(serv); |
| 114 | } | 136 | } |
| 115 | 137 | ||
| @@ -158,10 +180,11 @@ svc_release_buffer(struct svc_rqst *rqstp) | |||
| 158 | } | 180 | } |
| 159 | 181 | ||
| 160 | /* | 182 | /* |
| 161 | * Create a server thread | 183 | * Create a thread in the given pool. Caller must hold BKL. |
| 162 | */ | 184 | */ |
| 163 | int | 185 | static int |
| 164 | svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | 186 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, |
| 187 | struct svc_pool *pool) | ||
| 165 | { | 188 | { |
| 166 | struct svc_rqst *rqstp; | 189 | struct svc_rqst *rqstp; |
| 167 | int error = -ENOMEM; | 190 | int error = -ENOMEM; |
| @@ -178,7 +201,11 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | |||
| 178 | goto out_thread; | 201 | goto out_thread; |
| 179 | 202 | ||
| 180 | serv->sv_nrthreads++; | 203 | serv->sv_nrthreads++; |
| 204 | spin_lock_bh(&pool->sp_lock); | ||
| 205 | pool->sp_nrthreads++; | ||
| 206 | spin_unlock_bh(&pool->sp_lock); | ||
| 181 | rqstp->rq_server = serv; | 207 | rqstp->rq_server = serv; |
| 208 | rqstp->rq_pool = pool; | ||
| 182 | error = kernel_thread((int (*)(void *)) func, rqstp, 0); | 209 | error = kernel_thread((int (*)(void *)) func, rqstp, 0); |
| 183 | if (error < 0) | 210 | if (error < 0) |
| 184 | goto out_thread; | 211 | goto out_thread; |
| @@ -193,17 +220,32 @@ out_thread: | |||
| 193 | } | 220 | } |
| 194 | 221 | ||
| 195 | /* | 222 | /* |
| 196 | * Destroy an RPC server thread | 223 | * Create a thread in the default pool. Caller must hold BKL. |
| 224 | */ | ||
| 225 | int | ||
| 226 | svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | ||
| 227 | { | ||
| 228 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * Called from a server thread as it's exiting. Caller must hold BKL. | ||
| 197 | */ | 233 | */ |
| 198 | void | 234 | void |
| 199 | svc_exit_thread(struct svc_rqst *rqstp) | 235 | svc_exit_thread(struct svc_rqst *rqstp) |
| 200 | { | 236 | { |
| 201 | struct svc_serv *serv = rqstp->rq_server; | 237 | struct svc_serv *serv = rqstp->rq_server; |
| 238 | struct svc_pool *pool = rqstp->rq_pool; | ||
| 202 | 239 | ||
| 203 | svc_release_buffer(rqstp); | 240 | svc_release_buffer(rqstp); |
| 204 | kfree(rqstp->rq_resp); | 241 | kfree(rqstp->rq_resp); |
| 205 | kfree(rqstp->rq_argp); | 242 | kfree(rqstp->rq_argp); |
| 206 | kfree(rqstp->rq_auth_data); | 243 | kfree(rqstp->rq_auth_data); |
| 244 | |||
| 245 | spin_lock_bh(&pool->sp_lock); | ||
| 246 | pool->sp_nrthreads--; | ||
| 247 | spin_unlock_bh(&pool->sp_lock); | ||
| 248 | |||
| 207 | kfree(rqstp); | 249 | kfree(rqstp); |
| 208 | 250 | ||
| 209 | /* Release the server */ | 251 | /* Release the server */ |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a38df4589a..b78659adef 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
| @@ -46,7 +46,10 @@ | |||
| 46 | 46 | ||
| 47 | /* SMP locking strategy: | 47 | /* SMP locking strategy: |
| 48 | * | 48 | * |
| 49 | * svc_serv->sv_lock protects most stuff for that service. | 49 | * svc_pool->sp_lock protects most of the fields of that pool. |
| 50 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
| 51 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
| 52 | * BKL protects svc_serv->sv_nrthread. | ||
| 50 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list | 53 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list |
| 51 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | 54 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. |
| 52 | * | 55 | * |
| @@ -82,22 +85,22 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req); | |||
| 82 | static int svc_conn_age_period = 6*60; | 85 | static int svc_conn_age_period = 6*60; |
| 83 | 86 | ||
| 84 | /* | 87 | /* |
| 85 | * Queue up an idle server thread. Must have serv->sv_lock held. | 88 | * Queue up an idle server thread. Must have pool->sp_lock held. |
| 86 | * Note: this is really a stack rather than a queue, so that we only | 89 | * Note: this is really a stack rather than a queue, so that we only |
| 87 | * use as many different threads as we need, and the rest don't polute | 90 | * use as many different threads as we need, and the rest don't pollute |
| 88 | * the cache. | 91 | * the cache. |
| 89 | */ | 92 | */ |
| 90 | static inline void | 93 | static inline void |
| 91 | svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) | 94 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) |
| 92 | { | 95 | { |
| 93 | list_add(&rqstp->rq_list, &serv->sv_threads); | 96 | list_add(&rqstp->rq_list, &pool->sp_threads); |
| 94 | } | 97 | } |
| 95 | 98 | ||
| 96 | /* | 99 | /* |
| 97 | * Dequeue an nfsd thread. Must have serv->sv_lock held. | 100 | * Dequeue an nfsd thread. Must have pool->sp_lock held. |
| 98 | */ | 101 | */ |
| 99 | static inline void | 102 | static inline void |
| 100 | svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) | 103 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) |
| 101 | { | 104 | { |
| 102 | list_del(&rqstp->rq_list); | 105 | list_del(&rqstp->rq_list); |
| 103 | } | 106 | } |
| @@ -148,6 +151,7 @@ static void | |||
| 148 | svc_sock_enqueue(struct svc_sock *svsk) | 151 | svc_sock_enqueue(struct svc_sock *svsk) |
| 149 | { | 152 | { |
| 150 | struct svc_serv *serv = svsk->sk_server; | 153 | struct svc_serv *serv = svsk->sk_server; |
| 154 | struct svc_pool *pool = &serv->sv_pools[0]; | ||
| 151 | struct svc_rqst *rqstp; | 155 | struct svc_rqst *rqstp; |
| 152 | 156 | ||
| 153 | if (!(svsk->sk_flags & | 157 | if (!(svsk->sk_flags & |
| @@ -156,10 +160,10 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
| 156 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | 160 | if (test_bit(SK_DEAD, &svsk->sk_flags)) |
| 157 | return; | 161 | return; |
| 158 | 162 | ||
| 159 | spin_lock_bh(&serv->sv_lock); | 163 | spin_lock_bh(&pool->sp_lock); |
| 160 | 164 | ||
| 161 | if (!list_empty(&serv->sv_threads) && | 165 | if (!list_empty(&pool->sp_threads) && |
| 162 | !list_empty(&serv->sv_sockets)) | 166 | !list_empty(&pool->sp_sockets)) |
| 163 | printk(KERN_ERR | 167 | printk(KERN_ERR |
| 164 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | 168 | "svc_sock_enqueue: threads and sockets both waiting??\n"); |
| 165 | 169 | ||
| @@ -179,6 +183,8 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
| 179 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | 183 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); |
| 180 | goto out_unlock; | 184 | goto out_unlock; |
| 181 | } | 185 | } |
| 186 | BUG_ON(svsk->sk_pool != NULL); | ||
| 187 | svsk->sk_pool = pool; | ||
| 182 | 188 | ||
| 183 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 189 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
| 184 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 | 190 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 |
| @@ -189,19 +195,20 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
| 189 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | 195 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", |
| 190 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, | 196 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, |
| 191 | svc_sock_wspace(svsk)); | 197 | svc_sock_wspace(svsk)); |
| 198 | svsk->sk_pool = NULL; | ||
| 192 | clear_bit(SK_BUSY, &svsk->sk_flags); | 199 | clear_bit(SK_BUSY, &svsk->sk_flags); |
| 193 | goto out_unlock; | 200 | goto out_unlock; |
| 194 | } | 201 | } |
| 195 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 202 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
| 196 | 203 | ||
| 197 | 204 | ||
| 198 | if (!list_empty(&serv->sv_threads)) { | 205 | if (!list_empty(&pool->sp_threads)) { |
| 199 | rqstp = list_entry(serv->sv_threads.next, | 206 | rqstp = list_entry(pool->sp_threads.next, |
| 200 | struct svc_rqst, | 207 | struct svc_rqst, |
| 201 | rq_list); | 208 | rq_list); |
| 202 | dprintk("svc: socket %p served by daemon %p\n", | 209 | dprintk("svc: socket %p served by daemon %p\n", |
| 203 | svsk->sk_sk, rqstp); | 210 | svsk->sk_sk, rqstp); |
| 204 | svc_serv_dequeue(serv, rqstp); | 211 | svc_thread_dequeue(pool, rqstp); |
| 205 | if (rqstp->rq_sock) | 212 | if (rqstp->rq_sock) |
| 206 | printk(KERN_ERR | 213 | printk(KERN_ERR |
| 207 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | 214 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", |
| @@ -210,28 +217,30 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
| 210 | atomic_inc(&svsk->sk_inuse); | 217 | atomic_inc(&svsk->sk_inuse); |
| 211 | rqstp->rq_reserved = serv->sv_bufsz; | 218 | rqstp->rq_reserved = serv->sv_bufsz; |
| 212 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 219 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
| 220 | BUG_ON(svsk->sk_pool != pool); | ||
| 213 | wake_up(&rqstp->rq_wait); | 221 | wake_up(&rqstp->rq_wait); |
| 214 | } else { | 222 | } else { |
| 215 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | 223 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); |
| 216 | list_add_tail(&svsk->sk_ready, &serv->sv_sockets); | 224 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); |
| 225 | BUG_ON(svsk->sk_pool != pool); | ||
| 217 | } | 226 | } |
| 218 | 227 | ||
| 219 | out_unlock: | 228 | out_unlock: |
| 220 | spin_unlock_bh(&serv->sv_lock); | 229 | spin_unlock_bh(&pool->sp_lock); |
| 221 | } | 230 | } |
| 222 | 231 | ||
| 223 | /* | 232 | /* |
| 224 | * Dequeue the first socket. Must be called with the serv->sv_lock held. | 233 | * Dequeue the first socket. Must be called with the pool->sp_lock held. |
| 225 | */ | 234 | */ |
| 226 | static inline struct svc_sock * | 235 | static inline struct svc_sock * |
| 227 | svc_sock_dequeue(struct svc_serv *serv) | 236 | svc_sock_dequeue(struct svc_pool *pool) |
| 228 | { | 237 | { |
| 229 | struct svc_sock *svsk; | 238 | struct svc_sock *svsk; |
| 230 | 239 | ||
| 231 | if (list_empty(&serv->sv_sockets)) | 240 | if (list_empty(&pool->sp_sockets)) |
| 232 | return NULL; | 241 | return NULL; |
| 233 | 242 | ||
| 234 | svsk = list_entry(serv->sv_sockets.next, | 243 | svsk = list_entry(pool->sp_sockets.next, |
| 235 | struct svc_sock, sk_ready); | 244 | struct svc_sock, sk_ready); |
| 236 | list_del_init(&svsk->sk_ready); | 245 | list_del_init(&svsk->sk_ready); |
| 237 | 246 | ||
| @@ -250,6 +259,7 @@ svc_sock_dequeue(struct svc_serv *serv) | |||
| 250 | static inline void | 259 | static inline void |
| 251 | svc_sock_received(struct svc_sock *svsk) | 260 | svc_sock_received(struct svc_sock *svsk) |
| 252 | { | 261 | { |
| 262 | svsk->sk_pool = NULL; | ||
| 253 | clear_bit(SK_BUSY, &svsk->sk_flags); | 263 | clear_bit(SK_BUSY, &svsk->sk_flags); |
| 254 | svc_sock_enqueue(svsk); | 264 | svc_sock_enqueue(svsk); |
| 255 | } | 265 | } |
| @@ -322,25 +332,33 @@ svc_sock_release(struct svc_rqst *rqstp) | |||
| 322 | 332 | ||
| 323 | /* | 333 | /* |
| 324 | * External function to wake up a server waiting for data | 334 | * External function to wake up a server waiting for data |
| 335 | * This really only makes sense for services like lockd | ||
| 336 | * which have exactly one thread anyway. | ||
| 325 | */ | 337 | */ |
| 326 | void | 338 | void |
| 327 | svc_wake_up(struct svc_serv *serv) | 339 | svc_wake_up(struct svc_serv *serv) |
| 328 | { | 340 | { |
| 329 | struct svc_rqst *rqstp; | 341 | struct svc_rqst *rqstp; |
| 330 | 342 | unsigned int i; | |
| 331 | spin_lock_bh(&serv->sv_lock); | 343 | struct svc_pool *pool; |
| 332 | if (!list_empty(&serv->sv_threads)) { | 344 | |
| 333 | rqstp = list_entry(serv->sv_threads.next, | 345 | for (i = 0; i < serv->sv_nrpools; i++) { |
| 334 | struct svc_rqst, | 346 | pool = &serv->sv_pools[i]; |
| 335 | rq_list); | 347 | |
| 336 | dprintk("svc: daemon %p woken up.\n", rqstp); | 348 | spin_lock_bh(&pool->sp_lock); |
| 337 | /* | 349 | if (!list_empty(&pool->sp_threads)) { |
| 338 | svc_serv_dequeue(serv, rqstp); | 350 | rqstp = list_entry(pool->sp_threads.next, |
| 339 | rqstp->rq_sock = NULL; | 351 | struct svc_rqst, |
| 340 | */ | 352 | rq_list); |
| 341 | wake_up(&rqstp->rq_wait); | 353 | dprintk("svc: daemon %p woken up.\n", rqstp); |
| 354 | /* | ||
| 355 | svc_thread_dequeue(pool, rqstp); | ||
| 356 | rqstp->rq_sock = NULL; | ||
| 357 | */ | ||
| 358 | wake_up(&rqstp->rq_wait); | ||
| 359 | } | ||
| 360 | spin_unlock_bh(&pool->sp_lock); | ||
| 342 | } | 361 | } |
| 343 | spin_unlock_bh(&serv->sv_lock); | ||
| 344 | } | 362 | } |
| 345 | 363 | ||
| 346 | /* | 364 | /* |
| @@ -603,7 +621,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
| 603 | /* udp sockets need large rcvbuf as all pending | 621 | /* udp sockets need large rcvbuf as all pending |
| 604 | * requests are still in that buffer. sndbuf must | 622 | * requests are still in that buffer. sndbuf must |
| 605 | * also be large enough that there is enough space | 623 | * also be large enough that there is enough space |
| 606 | * for one reply per thread. | 624 | * for one reply per thread. We count all threads |
| 625 | * rather than threads in a particular pool, which | ||
| 626 | * provides an upper bound on the number of threads | ||
| 627 | * which will access the socket. | ||
| 607 | */ | 628 | */ |
| 608 | svc_sock_setbufsize(svsk->sk_sock, | 629 | svc_sock_setbufsize(svsk->sk_sock, |
| 609 | (serv->sv_nrthreads+3) * serv->sv_bufsz, | 630 | (serv->sv_nrthreads+3) * serv->sv_bufsz, |
| @@ -948,6 +969,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
| 948 | /* sndbuf needs to have room for one request | 969 | /* sndbuf needs to have room for one request |
| 949 | * per thread, otherwise we can stall even when the | 970 | * per thread, otherwise we can stall even when the |
| 950 | * network isn't a bottleneck. | 971 | * network isn't a bottleneck. |
| 972 | * | ||
| 973 | * We count all threads rather than threads in a | ||
| 974 | * particular pool, which provides an upper bound | ||
| 975 | * on the number of threads which will access the socket. | ||
| 976 | * | ||
| 951 | * rcvbuf just needs to be able to hold a few requests. | 977 | * rcvbuf just needs to be able to hold a few requests. |
| 952 | * Normally they will be removed from the queue | 978 | * Normally they will be removed from the queue |
| 953 | * as soon a a complete request arrives. | 979 | * as soon a a complete request arrives. |
| @@ -1163,13 +1189,16 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
| 1163 | } | 1189 | } |
| 1164 | 1190 | ||
| 1165 | /* | 1191 | /* |
| 1166 | * Receive the next request on any socket. | 1192 | * Receive the next request on any socket. This code is carefully |
| 1193 | * organised not to touch any cachelines in the shared svc_serv | ||
| 1194 | * structure, only cachelines in the local svc_pool. | ||
| 1167 | */ | 1195 | */ |
| 1168 | int | 1196 | int |
| 1169 | svc_recv(struct svc_rqst *rqstp, long timeout) | 1197 | svc_recv(struct svc_rqst *rqstp, long timeout) |
| 1170 | { | 1198 | { |
| 1171 | struct svc_sock *svsk =NULL; | 1199 | struct svc_sock *svsk =NULL; |
| 1172 | struct svc_serv *serv = rqstp->rq_server; | 1200 | struct svc_serv *serv = rqstp->rq_server; |
| 1201 | struct svc_pool *pool = rqstp->rq_pool; | ||
| 1173 | int len; | 1202 | int len; |
| 1174 | int pages; | 1203 | int pages; |
| 1175 | struct xdr_buf *arg; | 1204 | struct xdr_buf *arg; |
| @@ -1219,15 +1248,15 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
| 1219 | if (signalled()) | 1248 | if (signalled()) |
| 1220 | return -EINTR; | 1249 | return -EINTR; |
| 1221 | 1250 | ||
| 1222 | spin_lock_bh(&serv->sv_lock); | 1251 | spin_lock_bh(&pool->sp_lock); |
| 1223 | if ((svsk = svc_sock_dequeue(serv)) != NULL) { | 1252 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { |
| 1224 | rqstp->rq_sock = svsk; | 1253 | rqstp->rq_sock = svsk; |
| 1225 | atomic_inc(&svsk->sk_inuse); | 1254 | atomic_inc(&svsk->sk_inuse); |
| 1226 | rqstp->rq_reserved = serv->sv_bufsz; | 1255 | rqstp->rq_reserved = serv->sv_bufsz; |
| 1227 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 1256 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
| 1228 | } else { | 1257 | } else { |
| 1229 | /* No data pending. Go to sleep */ | 1258 | /* No data pending. Go to sleep */ |
| 1230 | svc_serv_enqueue(serv, rqstp); | 1259 | svc_thread_enqueue(pool, rqstp); |
| 1231 | 1260 | ||
| 1232 | /* | 1261 | /* |
| 1233 | * We have to be able to interrupt this wait | 1262 | * We have to be able to interrupt this wait |
| @@ -1235,26 +1264,26 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
| 1235 | */ | 1264 | */ |
| 1236 | set_current_state(TASK_INTERRUPTIBLE); | 1265 | set_current_state(TASK_INTERRUPTIBLE); |
| 1237 | add_wait_queue(&rqstp->rq_wait, &wait); | 1266 | add_wait_queue(&rqstp->rq_wait, &wait); |
| 1238 | spin_unlock_bh(&serv->sv_lock); | 1267 | spin_unlock_bh(&pool->sp_lock); |
| 1239 | 1268 | ||
| 1240 | schedule_timeout(timeout); | 1269 | schedule_timeout(timeout); |
| 1241 | 1270 | ||
| 1242 | try_to_freeze(); | 1271 | try_to_freeze(); |
| 1243 | 1272 | ||
| 1244 | spin_lock_bh(&serv->sv_lock); | 1273 | spin_lock_bh(&pool->sp_lock); |
| 1245 | remove_wait_queue(&rqstp->rq_wait, &wait); | 1274 | remove_wait_queue(&rqstp->rq_wait, &wait); |
| 1246 | 1275 | ||
| 1247 | if (!(svsk = rqstp->rq_sock)) { | 1276 | if (!(svsk = rqstp->rq_sock)) { |
| 1248 | svc_serv_dequeue(serv, rqstp); | 1277 | svc_thread_dequeue(pool, rqstp); |
| 1249 | spin_unlock_bh(&serv->sv_lock); | 1278 | spin_unlock_bh(&pool->sp_lock); |
| 1250 | dprintk("svc: server %p, no data yet\n", rqstp); | 1279 | dprintk("svc: server %p, no data yet\n", rqstp); |
| 1251 | return signalled()? -EINTR : -EAGAIN; | 1280 | return signalled()? -EINTR : -EAGAIN; |
| 1252 | } | 1281 | } |
| 1253 | } | 1282 | } |
| 1254 | spin_unlock_bh(&serv->sv_lock); | 1283 | spin_unlock_bh(&pool->sp_lock); |
| 1255 | 1284 | ||
| 1256 | dprintk("svc: server %p, socket %p, inuse=%d\n", | 1285 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", |
| 1257 | rqstp, svsk, atomic_read(&svsk->sk_inuse)); | 1286 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); |
| 1258 | len = svsk->sk_recvfrom(rqstp); | 1287 | len = svsk->sk_recvfrom(rqstp); |
| 1259 | dprintk("svc: got len=%d\n", len); | 1288 | dprintk("svc: got len=%d\n", len); |
| 1260 | 1289 | ||
| @@ -1553,7 +1582,13 @@ svc_delete_socket(struct svc_sock *svsk) | |||
| 1553 | 1582 | ||
| 1554 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | 1583 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) |
| 1555 | list_del_init(&svsk->sk_list); | 1584 | list_del_init(&svsk->sk_list); |
| 1556 | list_del_init(&svsk->sk_ready); | 1585 | /* |
| 1586 | * We used to delete the svc_sock from whichever list | ||
| 1587 | * it's sk_ready node was on, but we don't actually | ||
| 1588 | * need to. This is because the only time we're called | ||
| 1589 | * while still attached to a queue, the queue itself | ||
| 1590 | * is about to be destroyed (in svc_destroy). | ||
| 1591 | */ | ||
| 1557 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) | 1592 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) |
| 1558 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | 1593 | if (test_bit(SK_TEMP, &svsk->sk_flags)) |
| 1559 | serv->sv_tmpcnt--; | 1594 | serv->sv_tmpcnt--; |
