diff options
author | Greg Banks <gnb@melbourne.sgi.com> | 2006-10-02 05:17:58 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-02 10:57:19 -0400 |
commit | 3262c816a3d7fb1eaabce633caa317887ed549ae (patch) | |
tree | 5b635d8b62b9724ab2b1e5563aad37e35b894406 | |
parent | c081a0c7cfe42adf8e8b9c2b8d0b2ec7f47603e8 (diff) |
[PATCH] knfsd: split svc_serv into pools
Split out the list of idle threads and pending sockets from svc_serv into a
new svc_pool structure, and allocate a fixed number (in this patch, 1) of
pools per svc_serv. The new structure contains a lock which takes over
several of the duties of svc_serv->sv_lock, which is now relegated to
protecting only sv_tempsocks, sv_permsocks, and sv_tmpcnt in svc_serv.
The point is to move the hottest fields out of svc_serv and into svc_pool,
allowing a following patch to arrange for a svc_pool per NUMA node or per CPU.
This is a major step towards making the NFS server NUMA-friendly.
Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/sunrpc/svc.h | 25 | ||||
-rw-r--r-- | include/linux/sunrpc/svcsock.h | 1 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 56 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 125 |
4 files changed, 153 insertions, 54 deletions
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5eabded11061..c27d806af310 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
@@ -17,6 +17,25 @@ | |||
17 | #include <linux/wait.h> | 17 | #include <linux/wait.h> |
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | 19 | ||
20 | |||
21 | /* | ||
22 | * | ||
23 | * RPC service thread pool. | ||
24 | * | ||
25 | * Pool of threads and temporary sockets. Generally there is only | ||
26 | * a single one of these per RPC service, but on NUMA machines those | ||
27 | * services that can benefit from it (i.e. nfs but not lockd) will | ||
28 | * have one pool per NUMA node. This optimisation reduces cross- | ||
29 | * node traffic on multi-node NUMA NFS servers. | ||
30 | */ | ||
31 | struct svc_pool { | ||
32 | unsigned int sp_id; /* pool id; also node id on NUMA */ | ||
33 | spinlock_t sp_lock; /* protects all fields */ | ||
34 | struct list_head sp_threads; /* idle server threads */ | ||
35 | struct list_head sp_sockets; /* pending sockets */ | ||
36 | unsigned int sp_nrthreads; /* # of threads in pool */ | ||
37 | } ____cacheline_aligned_in_smp; | ||
38 | |||
20 | /* | 39 | /* |
21 | * RPC service. | 40 | * RPC service. |
22 | * | 41 | * |
@@ -28,8 +47,6 @@ | |||
28 | * We currently do not support more than one RPC program per daemon. | 47 | * We currently do not support more than one RPC program per daemon. |
29 | */ | 48 | */ |
30 | struct svc_serv { | 49 | struct svc_serv { |
31 | struct list_head sv_threads; /* idle server threads */ | ||
32 | struct list_head sv_sockets; /* pending sockets */ | ||
33 | struct svc_program * sv_program; /* RPC program */ | 50 | struct svc_program * sv_program; /* RPC program */ |
34 | struct svc_stat * sv_stats; /* RPC statistics */ | 51 | struct svc_stat * sv_stats; /* RPC statistics */ |
35 | spinlock_t sv_lock; | 52 | spinlock_t sv_lock; |
@@ -44,6 +61,9 @@ struct svc_serv { | |||
44 | 61 | ||
45 | char * sv_name; /* service name */ | 62 | char * sv_name; /* service name */ |
46 | 63 | ||
64 | unsigned int sv_nrpools; /* number of thread pools */ | ||
65 | struct svc_pool * sv_pools; /* array of thread pools */ | ||
66 | |||
47 | void (*sv_shutdown)(struct svc_serv *serv); | 67 | void (*sv_shutdown)(struct svc_serv *serv); |
48 | /* Callback to use when last thread | 68 | /* Callback to use when last thread |
49 | * exits. | 69 | * exits. |
@@ -138,6 +158,7 @@ struct svc_rqst { | |||
138 | int rq_addrlen; | 158 | int rq_addrlen; |
139 | 159 | ||
140 | struct svc_serv * rq_server; /* RPC service definition */ | 160 | struct svc_serv * rq_server; /* RPC service definition */ |
161 | struct svc_pool * rq_pool; /* thread pool */ | ||
141 | struct svc_procedure * rq_procinfo; /* procedure info */ | 162 | struct svc_procedure * rq_procinfo; /* procedure info */ |
142 | struct auth_ops * rq_authop; /* authentication flavour */ | 163 | struct auth_ops * rq_authop; /* authentication flavour */ |
143 | struct svc_cred rq_cred; /* auth info */ | 164 | struct svc_cred rq_cred; /* auth info */ |
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 7154e71c6d1f..4c296152cbfa 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h | |||
@@ -20,6 +20,7 @@ struct svc_sock { | |||
20 | struct socket * sk_sock; /* berkeley socket layer */ | 20 | struct socket * sk_sock; /* berkeley socket layer */ |
21 | struct sock * sk_sk; /* INET layer */ | 21 | struct sock * sk_sk; /* INET layer */ |
22 | 22 | ||
23 | struct svc_pool * sk_pool; /* current pool iff queued */ | ||
23 | struct svc_serv * sk_server; /* service for this socket */ | 24 | struct svc_serv * sk_server; /* service for this socket */ |
24 | atomic_t sk_inuse; /* use count */ | 25 | atomic_t sk_inuse; /* use count */ |
25 | unsigned long sk_flags; | 26 | unsigned long sk_flags; |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 0c2c52276285..6750cd474f84 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -32,6 +32,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
32 | struct svc_serv *serv; | 32 | struct svc_serv *serv; |
33 | int vers; | 33 | int vers; |
34 | unsigned int xdrsize; | 34 | unsigned int xdrsize; |
35 | unsigned int i; | ||
35 | 36 | ||
36 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) | 37 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) |
37 | return NULL; | 38 | return NULL; |
@@ -55,13 +56,33 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
55 | prog = prog->pg_next; | 56 | prog = prog->pg_next; |
56 | } | 57 | } |
57 | serv->sv_xdrsize = xdrsize; | 58 | serv->sv_xdrsize = xdrsize; |
58 | INIT_LIST_HEAD(&serv->sv_threads); | ||
59 | INIT_LIST_HEAD(&serv->sv_sockets); | ||
60 | INIT_LIST_HEAD(&serv->sv_tempsocks); | 59 | INIT_LIST_HEAD(&serv->sv_tempsocks); |
61 | INIT_LIST_HEAD(&serv->sv_permsocks); | 60 | INIT_LIST_HEAD(&serv->sv_permsocks); |
62 | init_timer(&serv->sv_temptimer); | 61 | init_timer(&serv->sv_temptimer); |
63 | spin_lock_init(&serv->sv_lock); | 62 | spin_lock_init(&serv->sv_lock); |
64 | 63 | ||
64 | serv->sv_nrpools = 1; | ||
65 | serv->sv_pools = | ||
66 | kcalloc(sizeof(struct svc_pool), serv->sv_nrpools, | ||
67 | GFP_KERNEL); | ||
68 | if (!serv->sv_pools) { | ||
69 | kfree(serv); | ||
70 | return NULL; | ||
71 | } | ||
72 | |||
73 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
74 | struct svc_pool *pool = &serv->sv_pools[i]; | ||
75 | |||
76 | dprintk("initialising pool %u for %s\n", | ||
77 | i, serv->sv_name); | ||
78 | |||
79 | pool->sp_id = i; | ||
80 | INIT_LIST_HEAD(&pool->sp_threads); | ||
81 | INIT_LIST_HEAD(&pool->sp_sockets); | ||
82 | spin_lock_init(&pool->sp_lock); | ||
83 | } | ||
84 | |||
85 | |||
65 | /* Remove any stale portmap registrations */ | 86 | /* Remove any stale portmap registrations */ |
66 | svc_register(serv, 0, 0); | 87 | svc_register(serv, 0, 0); |
67 | 88 | ||
@@ -69,7 +90,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
69 | } | 90 | } |
70 | 91 | ||
71 | /* | 92 | /* |
72 | * Destroy an RPC service | 93 | * Destroy an RPC service. Should be called with the BKL held |
73 | */ | 94 | */ |
74 | void | 95 | void |
75 | svc_destroy(struct svc_serv *serv) | 96 | svc_destroy(struct svc_serv *serv) |
@@ -110,6 +131,7 @@ svc_destroy(struct svc_serv *serv) | |||
110 | 131 | ||
111 | /* Unregister service with the portmapper */ | 132 | /* Unregister service with the portmapper */ |
112 | svc_register(serv, 0, 0); | 133 | svc_register(serv, 0, 0); |
134 | kfree(serv->sv_pools); | ||
113 | kfree(serv); | 135 | kfree(serv); |
114 | } | 136 | } |
115 | 137 | ||
@@ -158,10 +180,11 @@ svc_release_buffer(struct svc_rqst *rqstp) | |||
158 | } | 180 | } |
159 | 181 | ||
160 | /* | 182 | /* |
161 | * Create a server thread | 183 | * Create a thread in the given pool. Caller must hold BKL. |
162 | */ | 184 | */ |
163 | int | 185 | static int |
164 | svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | 186 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, |
187 | struct svc_pool *pool) | ||
165 | { | 188 | { |
166 | struct svc_rqst *rqstp; | 189 | struct svc_rqst *rqstp; |
167 | int error = -ENOMEM; | 190 | int error = -ENOMEM; |
@@ -178,7 +201,11 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | |||
178 | goto out_thread; | 201 | goto out_thread; |
179 | 202 | ||
180 | serv->sv_nrthreads++; | 203 | serv->sv_nrthreads++; |
204 | spin_lock_bh(&pool->sp_lock); | ||
205 | pool->sp_nrthreads++; | ||
206 | spin_unlock_bh(&pool->sp_lock); | ||
181 | rqstp->rq_server = serv; | 207 | rqstp->rq_server = serv; |
208 | rqstp->rq_pool = pool; | ||
182 | error = kernel_thread((int (*)(void *)) func, rqstp, 0); | 209 | error = kernel_thread((int (*)(void *)) func, rqstp, 0); |
183 | if (error < 0) | 210 | if (error < 0) |
184 | goto out_thread; | 211 | goto out_thread; |
@@ -193,17 +220,32 @@ out_thread: | |||
193 | } | 220 | } |
194 | 221 | ||
195 | /* | 222 | /* |
196 | * Destroy an RPC server thread | 223 | * Create a thread in the default pool. Caller must hold BKL. |
224 | */ | ||
225 | int | ||
226 | svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | ||
227 | { | ||
228 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Called from a server thread as it's exiting. Caller must hold BKL. | ||
197 | */ | 233 | */ |
198 | void | 234 | void |
199 | svc_exit_thread(struct svc_rqst *rqstp) | 235 | svc_exit_thread(struct svc_rqst *rqstp) |
200 | { | 236 | { |
201 | struct svc_serv *serv = rqstp->rq_server; | 237 | struct svc_serv *serv = rqstp->rq_server; |
238 | struct svc_pool *pool = rqstp->rq_pool; | ||
202 | 239 | ||
203 | svc_release_buffer(rqstp); | 240 | svc_release_buffer(rqstp); |
204 | kfree(rqstp->rq_resp); | 241 | kfree(rqstp->rq_resp); |
205 | kfree(rqstp->rq_argp); | 242 | kfree(rqstp->rq_argp); |
206 | kfree(rqstp->rq_auth_data); | 243 | kfree(rqstp->rq_auth_data); |
244 | |||
245 | spin_lock_bh(&pool->sp_lock); | ||
246 | pool->sp_nrthreads--; | ||
247 | spin_unlock_bh(&pool->sp_lock); | ||
248 | |||
207 | kfree(rqstp); | 249 | kfree(rqstp); |
208 | 250 | ||
209 | /* Release the server */ | 251 | /* Release the server */ |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a38df4589ae9..b78659adeff3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -46,7 +46,10 @@ | |||
46 | 46 | ||
47 | /* SMP locking strategy: | 47 | /* SMP locking strategy: |
48 | * | 48 | * |
49 | * svc_serv->sv_lock protects most stuff for that service. | 49 | * svc_pool->sp_lock protects most of the fields of that pool. |
50 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
51 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
52 | * BKL protects svc_serv->sv_nrthread. | ||
50 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list | 53 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list |
51 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | 54 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. |
52 | * | 55 | * |
@@ -82,22 +85,22 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req); | |||
82 | static int svc_conn_age_period = 6*60; | 85 | static int svc_conn_age_period = 6*60; |
83 | 86 | ||
84 | /* | 87 | /* |
85 | * Queue up an idle server thread. Must have serv->sv_lock held. | 88 | * Queue up an idle server thread. Must have pool->sp_lock held. |
86 | * Note: this is really a stack rather than a queue, so that we only | 89 | * Note: this is really a stack rather than a queue, so that we only |
87 | * use as many different threads as we need, and the rest don't polute | 90 | * use as many different threads as we need, and the rest don't pollute |
88 | * the cache. | 91 | * the cache. |
89 | */ | 92 | */ |
90 | static inline void | 93 | static inline void |
91 | svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) | 94 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) |
92 | { | 95 | { |
93 | list_add(&rqstp->rq_list, &serv->sv_threads); | 96 | list_add(&rqstp->rq_list, &pool->sp_threads); |
94 | } | 97 | } |
95 | 98 | ||
96 | /* | 99 | /* |
97 | * Dequeue an nfsd thread. Must have serv->sv_lock held. | 100 | * Dequeue an nfsd thread. Must have pool->sp_lock held. |
98 | */ | 101 | */ |
99 | static inline void | 102 | static inline void |
100 | svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) | 103 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) |
101 | { | 104 | { |
102 | list_del(&rqstp->rq_list); | 105 | list_del(&rqstp->rq_list); |
103 | } | 106 | } |
@@ -148,6 +151,7 @@ static void | |||
148 | svc_sock_enqueue(struct svc_sock *svsk) | 151 | svc_sock_enqueue(struct svc_sock *svsk) |
149 | { | 152 | { |
150 | struct svc_serv *serv = svsk->sk_server; | 153 | struct svc_serv *serv = svsk->sk_server; |
154 | struct svc_pool *pool = &serv->sv_pools[0]; | ||
151 | struct svc_rqst *rqstp; | 155 | struct svc_rqst *rqstp; |
152 | 156 | ||
153 | if (!(svsk->sk_flags & | 157 | if (!(svsk->sk_flags & |
@@ -156,10 +160,10 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
156 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | 160 | if (test_bit(SK_DEAD, &svsk->sk_flags)) |
157 | return; | 161 | return; |
158 | 162 | ||
159 | spin_lock_bh(&serv->sv_lock); | 163 | spin_lock_bh(&pool->sp_lock); |
160 | 164 | ||
161 | if (!list_empty(&serv->sv_threads) && | 165 | if (!list_empty(&pool->sp_threads) && |
162 | !list_empty(&serv->sv_sockets)) | 166 | !list_empty(&pool->sp_sockets)) |
163 | printk(KERN_ERR | 167 | printk(KERN_ERR |
164 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | 168 | "svc_sock_enqueue: threads and sockets both waiting??\n"); |
165 | 169 | ||
@@ -179,6 +183,8 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
179 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | 183 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); |
180 | goto out_unlock; | 184 | goto out_unlock; |
181 | } | 185 | } |
186 | BUG_ON(svsk->sk_pool != NULL); | ||
187 | svsk->sk_pool = pool; | ||
182 | 188 | ||
183 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 189 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
184 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 | 190 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 |
@@ -189,19 +195,20 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
189 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | 195 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", |
190 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, | 196 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, |
191 | svc_sock_wspace(svsk)); | 197 | svc_sock_wspace(svsk)); |
198 | svsk->sk_pool = NULL; | ||
192 | clear_bit(SK_BUSY, &svsk->sk_flags); | 199 | clear_bit(SK_BUSY, &svsk->sk_flags); |
193 | goto out_unlock; | 200 | goto out_unlock; |
194 | } | 201 | } |
195 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | 202 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); |
196 | 203 | ||
197 | 204 | ||
198 | if (!list_empty(&serv->sv_threads)) { | 205 | if (!list_empty(&pool->sp_threads)) { |
199 | rqstp = list_entry(serv->sv_threads.next, | 206 | rqstp = list_entry(pool->sp_threads.next, |
200 | struct svc_rqst, | 207 | struct svc_rqst, |
201 | rq_list); | 208 | rq_list); |
202 | dprintk("svc: socket %p served by daemon %p\n", | 209 | dprintk("svc: socket %p served by daemon %p\n", |
203 | svsk->sk_sk, rqstp); | 210 | svsk->sk_sk, rqstp); |
204 | svc_serv_dequeue(serv, rqstp); | 211 | svc_thread_dequeue(pool, rqstp); |
205 | if (rqstp->rq_sock) | 212 | if (rqstp->rq_sock) |
206 | printk(KERN_ERR | 213 | printk(KERN_ERR |
207 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | 214 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", |
@@ -210,28 +217,30 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
210 | atomic_inc(&svsk->sk_inuse); | 217 | atomic_inc(&svsk->sk_inuse); |
211 | rqstp->rq_reserved = serv->sv_bufsz; | 218 | rqstp->rq_reserved = serv->sv_bufsz; |
212 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 219 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
220 | BUG_ON(svsk->sk_pool != pool); | ||
213 | wake_up(&rqstp->rq_wait); | 221 | wake_up(&rqstp->rq_wait); |
214 | } else { | 222 | } else { |
215 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | 223 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); |
216 | list_add_tail(&svsk->sk_ready, &serv->sv_sockets); | 224 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); |
225 | BUG_ON(svsk->sk_pool != pool); | ||
217 | } | 226 | } |
218 | 227 | ||
219 | out_unlock: | 228 | out_unlock: |
220 | spin_unlock_bh(&serv->sv_lock); | 229 | spin_unlock_bh(&pool->sp_lock); |
221 | } | 230 | } |
222 | 231 | ||
223 | /* | 232 | /* |
224 | * Dequeue the first socket. Must be called with the serv->sv_lock held. | 233 | * Dequeue the first socket. Must be called with the pool->sp_lock held. |
225 | */ | 234 | */ |
226 | static inline struct svc_sock * | 235 | static inline struct svc_sock * |
227 | svc_sock_dequeue(struct svc_serv *serv) | 236 | svc_sock_dequeue(struct svc_pool *pool) |
228 | { | 237 | { |
229 | struct svc_sock *svsk; | 238 | struct svc_sock *svsk; |
230 | 239 | ||
231 | if (list_empty(&serv->sv_sockets)) | 240 | if (list_empty(&pool->sp_sockets)) |
232 | return NULL; | 241 | return NULL; |
233 | 242 | ||
234 | svsk = list_entry(serv->sv_sockets.next, | 243 | svsk = list_entry(pool->sp_sockets.next, |
235 | struct svc_sock, sk_ready); | 244 | struct svc_sock, sk_ready); |
236 | list_del_init(&svsk->sk_ready); | 245 | list_del_init(&svsk->sk_ready); |
237 | 246 | ||
@@ -250,6 +259,7 @@ svc_sock_dequeue(struct svc_serv *serv) | |||
250 | static inline void | 259 | static inline void |
251 | svc_sock_received(struct svc_sock *svsk) | 260 | svc_sock_received(struct svc_sock *svsk) |
252 | { | 261 | { |
262 | svsk->sk_pool = NULL; | ||
253 | clear_bit(SK_BUSY, &svsk->sk_flags); | 263 | clear_bit(SK_BUSY, &svsk->sk_flags); |
254 | svc_sock_enqueue(svsk); | 264 | svc_sock_enqueue(svsk); |
255 | } | 265 | } |
@@ -322,25 +332,33 @@ svc_sock_release(struct svc_rqst *rqstp) | |||
322 | 332 | ||
323 | /* | 333 | /* |
324 | * External function to wake up a server waiting for data | 334 | * External function to wake up a server waiting for data |
335 | * This really only makes sense for services like lockd | ||
336 | * which have exactly one thread anyway. | ||
325 | */ | 337 | */ |
326 | void | 338 | void |
327 | svc_wake_up(struct svc_serv *serv) | 339 | svc_wake_up(struct svc_serv *serv) |
328 | { | 340 | { |
329 | struct svc_rqst *rqstp; | 341 | struct svc_rqst *rqstp; |
330 | 342 | unsigned int i; | |
331 | spin_lock_bh(&serv->sv_lock); | 343 | struct svc_pool *pool; |
332 | if (!list_empty(&serv->sv_threads)) { | 344 | |
333 | rqstp = list_entry(serv->sv_threads.next, | 345 | for (i = 0; i < serv->sv_nrpools; i++) { |
334 | struct svc_rqst, | 346 | pool = &serv->sv_pools[i]; |
335 | rq_list); | 347 | |
336 | dprintk("svc: daemon %p woken up.\n", rqstp); | 348 | spin_lock_bh(&pool->sp_lock); |
337 | /* | 349 | if (!list_empty(&pool->sp_threads)) { |
338 | svc_serv_dequeue(serv, rqstp); | 350 | rqstp = list_entry(pool->sp_threads.next, |
339 | rqstp->rq_sock = NULL; | 351 | struct svc_rqst, |
340 | */ | 352 | rq_list); |
341 | wake_up(&rqstp->rq_wait); | 353 | dprintk("svc: daemon %p woken up.\n", rqstp); |
354 | /* | ||
355 | svc_thread_dequeue(pool, rqstp); | ||
356 | rqstp->rq_sock = NULL; | ||
357 | */ | ||
358 | wake_up(&rqstp->rq_wait); | ||
359 | } | ||
360 | spin_unlock_bh(&pool->sp_lock); | ||
342 | } | 361 | } |
343 | spin_unlock_bh(&serv->sv_lock); | ||
344 | } | 362 | } |
345 | 363 | ||
346 | /* | 364 | /* |
@@ -603,7 +621,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
603 | /* udp sockets need large rcvbuf as all pending | 621 | /* udp sockets need large rcvbuf as all pending |
604 | * requests are still in that buffer. sndbuf must | 622 | * requests are still in that buffer. sndbuf must |
605 | * also be large enough that there is enough space | 623 | * also be large enough that there is enough space |
606 | * for one reply per thread. | 624 | * for one reply per thread. We count all threads |
625 | * rather than threads in a particular pool, which | ||
626 | * provides an upper bound on the number of threads | ||
627 | * which will access the socket. | ||
607 | */ | 628 | */ |
608 | svc_sock_setbufsize(svsk->sk_sock, | 629 | svc_sock_setbufsize(svsk->sk_sock, |
609 | (serv->sv_nrthreads+3) * serv->sv_bufsz, | 630 | (serv->sv_nrthreads+3) * serv->sv_bufsz, |
@@ -948,6 +969,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
948 | /* sndbuf needs to have room for one request | 969 | /* sndbuf needs to have room for one request |
949 | * per thread, otherwise we can stall even when the | 970 | * per thread, otherwise we can stall even when the |
950 | * network isn't a bottleneck. | 971 | * network isn't a bottleneck. |
972 | * | ||
973 | * We count all threads rather than threads in a | ||
974 | * particular pool, which provides an upper bound | ||
975 | * on the number of threads which will access the socket. | ||
976 | * | ||
951 | * rcvbuf just needs to be able to hold a few requests. | 977 | * rcvbuf just needs to be able to hold a few requests. |
952 | * Normally they will be removed from the queue | 978 | * Normally they will be removed from the queue |
953 | * as soon a a complete request arrives. | 979 | * as soon a a complete request arrives. |
@@ -1163,13 +1189,16 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
1163 | } | 1189 | } |
1164 | 1190 | ||
1165 | /* | 1191 | /* |
1166 | * Receive the next request on any socket. | 1192 | * Receive the next request on any socket. This code is carefully |
1193 | * organised not to touch any cachelines in the shared svc_serv | ||
1194 | * structure, only cachelines in the local svc_pool. | ||
1167 | */ | 1195 | */ |
1168 | int | 1196 | int |
1169 | svc_recv(struct svc_rqst *rqstp, long timeout) | 1197 | svc_recv(struct svc_rqst *rqstp, long timeout) |
1170 | { | 1198 | { |
1171 | struct svc_sock *svsk =NULL; | 1199 | struct svc_sock *svsk =NULL; |
1172 | struct svc_serv *serv = rqstp->rq_server; | 1200 | struct svc_serv *serv = rqstp->rq_server; |
1201 | struct svc_pool *pool = rqstp->rq_pool; | ||
1173 | int len; | 1202 | int len; |
1174 | int pages; | 1203 | int pages; |
1175 | struct xdr_buf *arg; | 1204 | struct xdr_buf *arg; |
@@ -1219,15 +1248,15 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1219 | if (signalled()) | 1248 | if (signalled()) |
1220 | return -EINTR; | 1249 | return -EINTR; |
1221 | 1250 | ||
1222 | spin_lock_bh(&serv->sv_lock); | 1251 | spin_lock_bh(&pool->sp_lock); |
1223 | if ((svsk = svc_sock_dequeue(serv)) != NULL) { | 1252 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { |
1224 | rqstp->rq_sock = svsk; | 1253 | rqstp->rq_sock = svsk; |
1225 | atomic_inc(&svsk->sk_inuse); | 1254 | atomic_inc(&svsk->sk_inuse); |
1226 | rqstp->rq_reserved = serv->sv_bufsz; | 1255 | rqstp->rq_reserved = serv->sv_bufsz; |
1227 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 1256 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
1228 | } else { | 1257 | } else { |
1229 | /* No data pending. Go to sleep */ | 1258 | /* No data pending. Go to sleep */ |
1230 | svc_serv_enqueue(serv, rqstp); | 1259 | svc_thread_enqueue(pool, rqstp); |
1231 | 1260 | ||
1232 | /* | 1261 | /* |
1233 | * We have to be able to interrupt this wait | 1262 | * We have to be able to interrupt this wait |
@@ -1235,26 +1264,26 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1235 | */ | 1264 | */ |
1236 | set_current_state(TASK_INTERRUPTIBLE); | 1265 | set_current_state(TASK_INTERRUPTIBLE); |
1237 | add_wait_queue(&rqstp->rq_wait, &wait); | 1266 | add_wait_queue(&rqstp->rq_wait, &wait); |
1238 | spin_unlock_bh(&serv->sv_lock); | 1267 | spin_unlock_bh(&pool->sp_lock); |
1239 | 1268 | ||
1240 | schedule_timeout(timeout); | 1269 | schedule_timeout(timeout); |
1241 | 1270 | ||
1242 | try_to_freeze(); | 1271 | try_to_freeze(); |
1243 | 1272 | ||
1244 | spin_lock_bh(&serv->sv_lock); | 1273 | spin_lock_bh(&pool->sp_lock); |
1245 | remove_wait_queue(&rqstp->rq_wait, &wait); | 1274 | remove_wait_queue(&rqstp->rq_wait, &wait); |
1246 | 1275 | ||
1247 | if (!(svsk = rqstp->rq_sock)) { | 1276 | if (!(svsk = rqstp->rq_sock)) { |
1248 | svc_serv_dequeue(serv, rqstp); | 1277 | svc_thread_dequeue(pool, rqstp); |
1249 | spin_unlock_bh(&serv->sv_lock); | 1278 | spin_unlock_bh(&pool->sp_lock); |
1250 | dprintk("svc: server %p, no data yet\n", rqstp); | 1279 | dprintk("svc: server %p, no data yet\n", rqstp); |
1251 | return signalled()? -EINTR : -EAGAIN; | 1280 | return signalled()? -EINTR : -EAGAIN; |
1252 | } | 1281 | } |
1253 | } | 1282 | } |
1254 | spin_unlock_bh(&serv->sv_lock); | 1283 | spin_unlock_bh(&pool->sp_lock); |
1255 | 1284 | ||
1256 | dprintk("svc: server %p, socket %p, inuse=%d\n", | 1285 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", |
1257 | rqstp, svsk, atomic_read(&svsk->sk_inuse)); | 1286 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); |
1258 | len = svsk->sk_recvfrom(rqstp); | 1287 | len = svsk->sk_recvfrom(rqstp); |
1259 | dprintk("svc: got len=%d\n", len); | 1288 | dprintk("svc: got len=%d\n", len); |
1260 | 1289 | ||
@@ -1553,7 +1582,13 @@ svc_delete_socket(struct svc_sock *svsk) | |||
1553 | 1582 | ||
1554 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | 1583 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) |
1555 | list_del_init(&svsk->sk_list); | 1584 | list_del_init(&svsk->sk_list); |
1556 | list_del_init(&svsk->sk_ready); | 1585 | /* |
1586 | * We used to delete the svc_sock from whichever list | ||
1587 | * it's sk_ready node was on, but we don't actually | ||
1588 | * need to. This is because the only time we're called | ||
1589 | * while still attached to a queue, the queue itself | ||
1590 | * is about to be destroyed (in svc_destroy). | ||
1591 | */ | ||
1557 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) | 1592 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) |
1558 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | 1593 | if (test_bit(SK_TEMP, &svsk->sk_flags)) |
1559 | serv->sv_tmpcnt--; | 1594 | serv->sv_tmpcnt--; |