diff options
-rw-r--r-- | include/linux/sunrpc/svc.h | 1 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 255 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 7 |
3 files changed, 261 insertions, 2 deletions
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f2eeb833e7d8..4ebcdf91f3b3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
@@ -386,5 +386,6 @@ int svc_process(struct svc_rqst *); | |||
386 | int svc_register(struct svc_serv *, int, unsigned short); | 386 | int svc_register(struct svc_serv *, int, unsigned short); |
387 | void svc_wake_up(struct svc_serv *); | 387 | void svc_wake_up(struct svc_serv *); |
388 | void svc_reserve(struct svc_rqst *rqstp, int space); | 388 | void svc_reserve(struct svc_rqst *rqstp, int space); |
389 | struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); | ||
389 | 390 | ||
390 | #endif /* SUNRPC_SVC_H */ | 391 | #endif /* SUNRPC_SVC_H */ |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 8c75eec4fd6a..a99e67b164c1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -4,6 +4,10 @@ | |||
4 | * High-level RPC service routines | 4 | * High-level RPC service routines |
5 | * | 5 | * |
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
7 | * | ||
8 | * Multiple threads pools and NUMAisation | ||
9 | * Copyright (c) 2006 Silicon Graphics, Inc. | ||
10 | * by Greg Banks <gnb@melbourne.sgi.com> | ||
7 | */ | 11 | */ |
8 | 12 | ||
9 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
@@ -25,6 +29,242 @@ | |||
25 | #define RPC_PARANOIA 1 | 29 | #define RPC_PARANOIA 1 |
26 | 30 | ||
27 | /* | 31 | /* |
32 | * Mode for mapping cpus to pools. | ||
33 | */ | ||
34 | enum { | ||
35 | SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */ | ||
36 | SVC_POOL_GLOBAL, /* no mapping, just a single global pool | ||
37 | * (legacy & UP mode) */ | ||
38 | SVC_POOL_PERCPU, /* one pool per cpu */ | ||
39 | SVC_POOL_PERNODE /* one pool per numa node */ | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * Structure for mapping cpus to pools and vice versa. | ||
44 | * Setup once during sunrpc initialisation. | ||
45 | */ | ||
46 | static struct svc_pool_map { | ||
47 | int mode; /* Note: int not enum to avoid | ||
48 | * warnings about "enumeration value | ||
49 | * not handled in switch" */ | ||
50 | unsigned int npools; | ||
51 | unsigned int *pool_to; /* maps pool id to cpu or node */ | ||
52 | unsigned int *to_pool; /* maps cpu or node to pool id */ | ||
53 | } svc_pool_map = { | ||
54 | .mode = SVC_POOL_NONE | ||
55 | }; | ||
56 | |||
57 | |||
58 | /* | ||
59 | * Detect best pool mapping mode heuristically, | ||
60 | * according to the machine's topology. | ||
61 | */ | ||
62 | static int | ||
63 | svc_pool_map_choose_mode(void) | ||
64 | { | ||
65 | unsigned int node; | ||
66 | |||
67 | if (num_online_nodes() > 1) { | ||
68 | /* | ||
69 | * Actually have multiple NUMA nodes, | ||
70 | * so split pools on NUMA node boundaries | ||
71 | */ | ||
72 | return SVC_POOL_PERNODE; | ||
73 | } | ||
74 | |||
75 | node = any_online_node(node_online_map); | ||
76 | if (nr_cpus_node(node) > 2) { | ||
77 | /* | ||
78 | * Non-trivial SMP, or CONFIG_NUMA on | ||
79 | * non-NUMA hardware, e.g. with a generic | ||
80 | * x86_64 kernel on Xeons. In this case we | ||
81 | * want to divide the pools on cpu boundaries. | ||
82 | */ | ||
83 | return SVC_POOL_PERCPU; | ||
84 | } | ||
85 | |||
86 | /* default: one global pool */ | ||
87 | return SVC_POOL_GLOBAL; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * Allocate the to_pool[] and pool_to[] arrays. | ||
92 | * Returns 0 on success or an errno. | ||
93 | */ | ||
94 | static int | ||
95 | svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) | ||
96 | { | ||
97 | m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); | ||
98 | if (!m->to_pool) | ||
99 | goto fail; | ||
100 | m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); | ||
101 | if (!m->pool_to) | ||
102 | goto fail_free; | ||
103 | |||
104 | return 0; | ||
105 | |||
106 | fail_free: | ||
107 | kfree(m->to_pool); | ||
108 | fail: | ||
109 | return -ENOMEM; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Initialise the pool map for SVC_POOL_PERCPU mode. | ||
114 | * Returns number of pools or <0 on error. | ||
115 | */ | ||
116 | static int | ||
117 | svc_pool_map_init_percpu(struct svc_pool_map *m) | ||
118 | { | ||
119 | unsigned int maxpools = highest_possible_processor_id()+1; | ||
120 | unsigned int pidx = 0; | ||
121 | unsigned int cpu; | ||
122 | int err; | ||
123 | |||
124 | err = svc_pool_map_alloc_arrays(m, maxpools); | ||
125 | if (err) | ||
126 | return err; | ||
127 | |||
128 | for_each_online_cpu(cpu) { | ||
129 | BUG_ON(pidx > maxpools); | ||
130 | m->to_pool[cpu] = pidx; | ||
131 | m->pool_to[pidx] = cpu; | ||
132 | pidx++; | ||
133 | } | ||
134 | /* cpus brought online later all get mapped to pool0, sorry */ | ||
135 | |||
136 | return pidx; | ||
137 | }; | ||
138 | |||
139 | |||
140 | /* | ||
141 | * Initialise the pool map for SVC_POOL_PERNODE mode. | ||
142 | * Returns number of pools or <0 on error. | ||
143 | */ | ||
144 | static int | ||
145 | svc_pool_map_init_pernode(struct svc_pool_map *m) | ||
146 | { | ||
147 | unsigned int maxpools = highest_possible_node_id()+1; | ||
148 | unsigned int pidx = 0; | ||
149 | unsigned int node; | ||
150 | int err; | ||
151 | |||
152 | err = svc_pool_map_alloc_arrays(m, maxpools); | ||
153 | if (err) | ||
154 | return err; | ||
155 | |||
156 | for_each_node_with_cpus(node) { | ||
157 | /* some architectures (e.g. SN2) have cpuless nodes */ | ||
158 | BUG_ON(pidx > maxpools); | ||
159 | m->to_pool[node] = pidx; | ||
160 | m->pool_to[pidx] = node; | ||
161 | pidx++; | ||
162 | } | ||
163 | /* nodes brought online later all get mapped to pool0, sorry */ | ||
164 | |||
165 | return pidx; | ||
166 | } | ||
167 | |||
168 | |||
169 | /* | ||
170 | * Build the global map of cpus to pools and vice versa. | ||
171 | */ | ||
172 | static unsigned int | ||
173 | svc_pool_map_init(void) | ||
174 | { | ||
175 | struct svc_pool_map *m = &svc_pool_map; | ||
176 | int npools = -1; | ||
177 | |||
178 | if (m->mode != SVC_POOL_NONE) | ||
179 | return m->npools; | ||
180 | |||
181 | m->mode = svc_pool_map_choose_mode(); | ||
182 | |||
183 | switch (m->mode) { | ||
184 | case SVC_POOL_PERCPU: | ||
185 | npools = svc_pool_map_init_percpu(m); | ||
186 | break; | ||
187 | case SVC_POOL_PERNODE: | ||
188 | npools = svc_pool_map_init_pernode(m); | ||
189 | break; | ||
190 | } | ||
191 | |||
192 | if (npools < 0) { | ||
193 | /* default, or memory allocation failure */ | ||
194 | npools = 1; | ||
195 | m->mode = SVC_POOL_GLOBAL; | ||
196 | } | ||
197 | m->npools = npools; | ||
198 | |||
199 | return m->npools; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * Set the current thread's cpus_allowed mask so that it | ||
204 | * will only run on cpus in the given pool. | ||
205 | * | ||
206 | * Returns 1 and fills in oldmask iff a cpumask was applied. | ||
207 | */ | ||
208 | static inline int | ||
209 | svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) | ||
210 | { | ||
211 | struct svc_pool_map *m = &svc_pool_map; | ||
212 | unsigned int node; /* or cpu */ | ||
213 | |||
214 | /* | ||
215 | * The caller checks for sv_nrpools > 1, which | ||
216 | * implies that we've been initialized and the | ||
217 | * map mode is not NONE. | ||
218 | */ | ||
219 | BUG_ON(m->mode == SVC_POOL_NONE); | ||
220 | |||
221 | switch (m->mode) | ||
222 | { | ||
223 | default: | ||
224 | return 0; | ||
225 | case SVC_POOL_PERCPU: | ||
226 | node = m->pool_to[pidx]; | ||
227 | *oldmask = current->cpus_allowed; | ||
228 | set_cpus_allowed(current, cpumask_of_cpu(node)); | ||
229 | return 1; | ||
230 | case SVC_POOL_PERNODE: | ||
231 | node = m->pool_to[pidx]; | ||
232 | *oldmask = current->cpus_allowed; | ||
233 | set_cpus_allowed(current, node_to_cpumask(node)); | ||
234 | return 1; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * Use the mapping mode to choose a pool for a given CPU. | ||
240 | * Used when enqueueing an incoming RPC. Always returns | ||
241 | * a non-NULL pool pointer. | ||
242 | */ | ||
243 | struct svc_pool * | ||
244 | svc_pool_for_cpu(struct svc_serv *serv, int cpu) | ||
245 | { | ||
246 | struct svc_pool_map *m = &svc_pool_map; | ||
247 | unsigned int pidx = 0; | ||
248 | |||
249 | /* | ||
250 | * SVC_POOL_NONE happens in a pure client when | ||
251 | * lockd is brought up, so silently treat it the | ||
252 | * same as SVC_POOL_GLOBAL. | ||
253 | */ | ||
254 | |||
255 | switch (m->mode) { | ||
256 | case SVC_POOL_PERCPU: | ||
257 | pidx = m->to_pool[cpu]; | ||
258 | break; | ||
259 | case SVC_POOL_PERNODE: | ||
260 | pidx = m->to_pool[cpu_to_node(cpu)]; | ||
261 | break; | ||
262 | } | ||
263 | return &serv->sv_pools[pidx % serv->sv_nrpools]; | ||
264 | } | ||
265 | |||
266 | |||
267 | /* | ||
28 | * Create an RPC service | 268 | * Create an RPC service |
29 | */ | 269 | */ |
30 | static struct svc_serv * | 270 | static struct svc_serv * |
@@ -105,8 +345,9 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | |||
105 | svc_thread_fn func, int sig, struct module *mod) | 345 | svc_thread_fn func, int sig, struct module *mod) |
106 | { | 346 | { |
107 | struct svc_serv *serv; | 347 | struct svc_serv *serv; |
348 | unsigned int npools = svc_pool_map_init(); | ||
108 | 349 | ||
109 | serv = __svc_create(prog, bufsize, /*npools*/1, shutdown); | 350 | serv = __svc_create(prog, bufsize, npools, shutdown); |
110 | 351 | ||
111 | if (serv != NULL) { | 352 | if (serv != NULL) { |
112 | serv->sv_function = func; | 353 | serv->sv_function = func; |
@@ -209,6 +450,8 @@ svc_release_buffer(struct svc_rqst *rqstp) | |||
209 | 450 | ||
210 | /* | 451 | /* |
211 | * Create a thread in the given pool. Caller must hold BKL. | 452 | * Create a thread in the given pool. Caller must hold BKL. |
453 | * On a NUMA or SMP machine, with a multi-pool serv, the thread | ||
454 | * will be restricted to run on the cpus belonging to the pool. | ||
212 | */ | 455 | */ |
213 | static int | 456 | static int |
214 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | 457 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, |
@@ -216,6 +459,8 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | |||
216 | { | 459 | { |
217 | struct svc_rqst *rqstp; | 460 | struct svc_rqst *rqstp; |
218 | int error = -ENOMEM; | 461 | int error = -ENOMEM; |
462 | int have_oldmask = 0; | ||
463 | cpumask_t oldmask; | ||
219 | 464 | ||
220 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); | 465 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); |
221 | if (!rqstp) | 466 | if (!rqstp) |
@@ -235,7 +480,15 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | |||
235 | spin_unlock_bh(&pool->sp_lock); | 480 | spin_unlock_bh(&pool->sp_lock); |
236 | rqstp->rq_server = serv; | 481 | rqstp->rq_server = serv; |
237 | rqstp->rq_pool = pool; | 482 | rqstp->rq_pool = pool; |
483 | |||
484 | if (serv->sv_nrpools > 1) | ||
485 | have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); | ||
486 | |||
238 | error = kernel_thread((int (*)(void *)) func, rqstp, 0); | 487 | error = kernel_thread((int (*)(void *)) func, rqstp, 0); |
488 | |||
489 | if (have_oldmask) | ||
490 | set_cpus_allowed(current, oldmask); | ||
491 | |||
239 | if (error < 0) | 492 | if (error < 0) |
240 | goto out_thread; | 493 | goto out_thread; |
241 | svc_sock_update_bufs(serv); | 494 | svc_sock_update_bufs(serv); |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b78659adeff3..cba85d195222 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -151,8 +151,9 @@ static void | |||
151 | svc_sock_enqueue(struct svc_sock *svsk) | 151 | svc_sock_enqueue(struct svc_sock *svsk) |
152 | { | 152 | { |
153 | struct svc_serv *serv = svsk->sk_server; | 153 | struct svc_serv *serv = svsk->sk_server; |
154 | struct svc_pool *pool = &serv->sv_pools[0]; | 154 | struct svc_pool *pool; |
155 | struct svc_rqst *rqstp; | 155 | struct svc_rqst *rqstp; |
156 | int cpu; | ||
156 | 157 | ||
157 | if (!(svsk->sk_flags & | 158 | if (!(svsk->sk_flags & |
158 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) | 159 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) |
@@ -160,6 +161,10 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
160 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | 161 | if (test_bit(SK_DEAD, &svsk->sk_flags)) |
161 | return; | 162 | return; |
162 | 163 | ||
164 | cpu = get_cpu(); | ||
165 | pool = svc_pool_for_cpu(svsk->sk_server, cpu); | ||
166 | put_cpu(); | ||
167 | |||
163 | spin_lock_bh(&pool->sp_lock); | 168 | spin_lock_bh(&pool->sp_lock); |
164 | 169 | ||
165 | if (!list_empty(&pool->sp_threads) && | 170 | if (!list_empty(&pool->sp_threads) && |