aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sunrpc/svc.h1
-rw-r--r--net/sunrpc/svc.c255
-rw-r--r--net/sunrpc/svcsock.c7
3 files changed, 261 insertions, 2 deletions
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f2eeb833e7d8..4ebcdf91f3b3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -386,5 +386,6 @@ int svc_process(struct svc_rqst *);
386int svc_register(struct svc_serv *, int, unsigned short); 386int svc_register(struct svc_serv *, int, unsigned short);
387void svc_wake_up(struct svc_serv *); 387void svc_wake_up(struct svc_serv *);
388void svc_reserve(struct svc_rqst *rqstp, int space); 388void svc_reserve(struct svc_rqst *rqstp, int space);
389struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
389 390
390#endif /* SUNRPC_SVC_H */ 391#endif /* SUNRPC_SVC_H */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 8c75eec4fd6a..a99e67b164c1 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -4,6 +4,10 @@
4 * High-level RPC service routines 4 * High-level RPC service routines
5 * 5 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 *
8 * Multiple threads pools and NUMAisation
9 * Copyright (c) 2006 Silicon Graphics, Inc.
10 * by Greg Banks <gnb@melbourne.sgi.com>
7 */ 11 */
8 12
9#include <linux/linkage.h> 13#include <linux/linkage.h>
@@ -25,6 +29,242 @@
25#define RPC_PARANOIA 1 29#define RPC_PARANOIA 1
26 30
27/* 31/*
32 * Mode for mapping cpus to pools.
33 */
34enum {
35 SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */
36 SVC_POOL_GLOBAL, /* no mapping, just a single global pool
37 * (legacy & UP mode) */
38 SVC_POOL_PERCPU, /* one pool per cpu */
39 SVC_POOL_PERNODE /* one pool per numa node */
40};
41
42/*
43 * Structure for mapping cpus to pools and vice versa.
44 * Setup once during sunrpc initialisation.
45 */
46static struct svc_pool_map {
47 int mode; /* Note: int not enum to avoid
48 * warnings about "enumeration value
49 * not handled in switch" */
50 unsigned int npools;
51 unsigned int *pool_to; /* maps pool id to cpu or node */
52 unsigned int *to_pool; /* maps cpu or node to pool id */
53} svc_pool_map = {
54 .mode = SVC_POOL_NONE
55};
56
57
58/*
59 * Detect best pool mapping mode heuristically,
60 * according to the machine's topology.
61 */
62static int
63svc_pool_map_choose_mode(void)
64{
65 unsigned int node;
66
67 if (num_online_nodes() > 1) {
68 /*
69 * Actually have multiple NUMA nodes,
70 * so split pools on NUMA node boundaries
71 */
72 return SVC_POOL_PERNODE;
73 }
74
75 node = any_online_node(node_online_map);
76 if (nr_cpus_node(node) > 2) {
77 /*
78 * Non-trivial SMP, or CONFIG_NUMA on
79 * non-NUMA hardware, e.g. with a generic
80 * x86_64 kernel on Xeons. In this case we
81 * want to divide the pools on cpu boundaries.
82 */
83 return SVC_POOL_PERCPU;
84 }
85
86 /* default: one global pool */
87 return SVC_POOL_GLOBAL;
88}
89
90/*
91 * Allocate the to_pool[] and pool_to[] arrays.
92 * Returns 0 on success or an errno.
93 */
94static int
95svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
96{
97 m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
98 if (!m->to_pool)
99 goto fail;
100 m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
101 if (!m->pool_to)
102 goto fail_free;
103
104 return 0;
105
106fail_free:
107 kfree(m->to_pool);
108fail:
109 return -ENOMEM;
110}
111
112/*
113 * Initialise the pool map for SVC_POOL_PERCPU mode.
114 * Returns number of pools or <0 on error.
115 */
116static int
117svc_pool_map_init_percpu(struct svc_pool_map *m)
118{
119 unsigned int maxpools = highest_possible_processor_id()+1;
120 unsigned int pidx = 0;
121 unsigned int cpu;
122 int err;
123
124 err = svc_pool_map_alloc_arrays(m, maxpools);
125 if (err)
126 return err;
127
128 for_each_online_cpu(cpu) {
129 BUG_ON(pidx > maxpools);
130 m->to_pool[cpu] = pidx;
131 m->pool_to[pidx] = cpu;
132 pidx++;
133 }
134 /* cpus brought online later all get mapped to pool0, sorry */
135
136 return pidx;
137};
138
139
140/*
141 * Initialise the pool map for SVC_POOL_PERNODE mode.
142 * Returns number of pools or <0 on error.
143 */
144static int
145svc_pool_map_init_pernode(struct svc_pool_map *m)
146{
147 unsigned int maxpools = highest_possible_node_id()+1;
148 unsigned int pidx = 0;
149 unsigned int node;
150 int err;
151
152 err = svc_pool_map_alloc_arrays(m, maxpools);
153 if (err)
154 return err;
155
156 for_each_node_with_cpus(node) {
157 /* some architectures (e.g. SN2) have cpuless nodes */
158 BUG_ON(pidx > maxpools);
159 m->to_pool[node] = pidx;
160 m->pool_to[pidx] = node;
161 pidx++;
162 }
163 /* nodes brought online later all get mapped to pool0, sorry */
164
165 return pidx;
166}
167
168
169/*
170 * Build the global map of cpus to pools and vice versa.
171 */
172static unsigned int
173svc_pool_map_init(void)
174{
175 struct svc_pool_map *m = &svc_pool_map;
176 int npools = -1;
177
178 if (m->mode != SVC_POOL_NONE)
179 return m->npools;
180
181 m->mode = svc_pool_map_choose_mode();
182
183 switch (m->mode) {
184 case SVC_POOL_PERCPU:
185 npools = svc_pool_map_init_percpu(m);
186 break;
187 case SVC_POOL_PERNODE:
188 npools = svc_pool_map_init_pernode(m);
189 break;
190 }
191
192 if (npools < 0) {
193 /* default, or memory allocation failure */
194 npools = 1;
195 m->mode = SVC_POOL_GLOBAL;
196 }
197 m->npools = npools;
198
199 return m->npools;
200}
201
202/*
203 * Set the current thread's cpus_allowed mask so that it
204 * will only run on cpus in the given pool.
205 *
206 * Returns 1 and fills in oldmask iff a cpumask was applied.
207 */
208static inline int
209svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
210{
211 struct svc_pool_map *m = &svc_pool_map;
212 unsigned int node; /* or cpu */
213
214 /*
215 * The caller checks for sv_nrpools > 1, which
216 * implies that we've been initialized and the
217 * map mode is not NONE.
218 */
219 BUG_ON(m->mode == SVC_POOL_NONE);
220
221 switch (m->mode)
222 {
223 default:
224 return 0;
225 case SVC_POOL_PERCPU:
226 node = m->pool_to[pidx];
227 *oldmask = current->cpus_allowed;
228 set_cpus_allowed(current, cpumask_of_cpu(node));
229 return 1;
230 case SVC_POOL_PERNODE:
231 node = m->pool_to[pidx];
232 *oldmask = current->cpus_allowed;
233 set_cpus_allowed(current, node_to_cpumask(node));
234 return 1;
235 }
236}
237
238/*
239 * Use the mapping mode to choose a pool for a given CPU.
240 * Used when enqueueing an incoming RPC. Always returns
241 * a non-NULL pool pointer.
242 */
243struct svc_pool *
244svc_pool_for_cpu(struct svc_serv *serv, int cpu)
245{
246 struct svc_pool_map *m = &svc_pool_map;
247 unsigned int pidx = 0;
248
249 /*
250 * SVC_POOL_NONE happens in a pure client when
251 * lockd is brought up, so silently treat it the
252 * same as SVC_POOL_GLOBAL.
253 */
254
255 switch (m->mode) {
256 case SVC_POOL_PERCPU:
257 pidx = m->to_pool[cpu];
258 break;
259 case SVC_POOL_PERNODE:
260 pidx = m->to_pool[cpu_to_node(cpu)];
261 break;
262 }
263 return &serv->sv_pools[pidx % serv->sv_nrpools];
264}
265
266
267/*
28 * Create an RPC service 268 * Create an RPC service
29 */ 269 */
30static struct svc_serv * 270static struct svc_serv *
@@ -105,8 +345,9 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
105 svc_thread_fn func, int sig, struct module *mod) 345 svc_thread_fn func, int sig, struct module *mod)
106{ 346{
107 struct svc_serv *serv; 347 struct svc_serv *serv;
348 unsigned int npools = svc_pool_map_init();
108 349
109 serv = __svc_create(prog, bufsize, /*npools*/1, shutdown); 350 serv = __svc_create(prog, bufsize, npools, shutdown);
110 351
111 if (serv != NULL) { 352 if (serv != NULL) {
112 serv->sv_function = func; 353 serv->sv_function = func;
@@ -209,6 +450,8 @@ svc_release_buffer(struct svc_rqst *rqstp)
209 450
210/* 451/*
211 * Create a thread in the given pool. Caller must hold BKL. 452 * Create a thread in the given pool. Caller must hold BKL.
453 * On a NUMA or SMP machine, with a multi-pool serv, the thread
454 * will be restricted to run on the cpus belonging to the pool.
212 */ 455 */
213static int 456static int
214__svc_create_thread(svc_thread_fn func, struct svc_serv *serv, 457__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
@@ -216,6 +459,8 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
216{ 459{
217 struct svc_rqst *rqstp; 460 struct svc_rqst *rqstp;
218 int error = -ENOMEM; 461 int error = -ENOMEM;
462 int have_oldmask = 0;
463 cpumask_t oldmask;
219 464
220 rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); 465 rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
221 if (!rqstp) 466 if (!rqstp)
@@ -235,7 +480,15 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
235 spin_unlock_bh(&pool->sp_lock); 480 spin_unlock_bh(&pool->sp_lock);
236 rqstp->rq_server = serv; 481 rqstp->rq_server = serv;
237 rqstp->rq_pool = pool; 482 rqstp->rq_pool = pool;
483
484 if (serv->sv_nrpools > 1)
485 have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
486
238 error = kernel_thread((int (*)(void *)) func, rqstp, 0); 487 error = kernel_thread((int (*)(void *)) func, rqstp, 0);
488
489 if (have_oldmask)
490 set_cpus_allowed(current, oldmask);
491
239 if (error < 0) 492 if (error < 0)
240 goto out_thread; 493 goto out_thread;
241 svc_sock_update_bufs(serv); 494 svc_sock_update_bufs(serv);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b78659adeff3..cba85d195222 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -151,8 +151,9 @@ static void
151svc_sock_enqueue(struct svc_sock *svsk) 151svc_sock_enqueue(struct svc_sock *svsk)
152{ 152{
153 struct svc_serv *serv = svsk->sk_server; 153 struct svc_serv *serv = svsk->sk_server;
154 struct svc_pool *pool = &serv->sv_pools[0]; 154 struct svc_pool *pool;
155 struct svc_rqst *rqstp; 155 struct svc_rqst *rqstp;
156 int cpu;
156 157
157 if (!(svsk->sk_flags & 158 if (!(svsk->sk_flags &
158 ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) 159 ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
@@ -160,6 +161,10 @@ svc_sock_enqueue(struct svc_sock *svsk)
160 if (test_bit(SK_DEAD, &svsk->sk_flags)) 161 if (test_bit(SK_DEAD, &svsk->sk_flags))
161 return; 162 return;
162 163
164 cpu = get_cpu();
165 pool = svc_pool_for_cpu(svsk->sk_server, cpu);
166 put_cpu();
167
163 spin_lock_bh(&pool->sp_lock); 168 spin_lock_bh(&pool->sp_lock);
164 169
165 if (!list_empty(&pool->sp_threads) && 170 if (!list_empty(&pool->sp_threads) &&