summaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.vnet.ibm.com>2017-09-03 14:15:13 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2017-09-28 03:09:16 -0400
commit03b8abedf4f4965e7e9e0d4f92877c42c07ce19f (patch)
tree477b0118aedfca952b43149def528bcefbf8a8c8 /drivers/misc
parent6110236b9bbd177debc045c5fc29224444686ece (diff)
cxl: Enable global TLBIs for cxl contexts
The PSL and nMMU need to see all TLB invalidations for the memory contexts used on the adapter. For the hash memory model, it is done by making all TLBIs global as soon as the cxl driver is in use. For radix, we need something similar, but we can refine and only convert to global the invalidations for contexts actually used by the device. The new mm_context_add_copro() API increments the 'active_cpus' count for the contexts attached to the cxl adapter. As soon as there's more than 1 active cpu, the TLBIs for the context become global. Active cpu count must be decremented when detaching to restore locality if possible and to avoid overflowing the counter. The hash memory model support is somewhat limited, as we can't decrement the active cpus count when mm_context_remove_copro() is called, because we can't flush the TLB for a mm on hash. So TLBIs remain global on hash. Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> Fixes: f24be42aab37 ("cxl: Add psl9 specific code") Tested-by: Alistair Popple <alistair@popple.id.au> [mpe: Fold in updated comment on the barrier from Fred] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/cxl/api.c16
-rw-r--r--drivers/misc/cxl/context.c3
-rw-r--r--drivers/misc/cxl/file.c24
3 files changed, 38 insertions, 5 deletions
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index a0c44d16bf30..7c11bad5cded 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/sched/mm.h> 17#include <linux/sched/mm.h>
18#include <linux/mmu_context.h>
18 19
19#include "cxl.h" 20#include "cxl.h"
20 21
@@ -331,9 +332,12 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
331 /* ensure this mm_struct can't be freed */ 332 /* ensure this mm_struct can't be freed */
332 cxl_context_mm_count_get(ctx); 333 cxl_context_mm_count_get(ctx);
333 334
334 /* decrement the use count */ 335 if (ctx->mm) {
335 if (ctx->mm) 336 /* decrement the use count from above */
336 mmput(ctx->mm); 337 mmput(ctx->mm);
338 /* make TLBIs for this context global */
339 mm_context_add_copro(ctx->mm);
340 }
337 } 341 }
338 342
339 /* 343 /*
@@ -342,13 +346,19 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
342 */ 346 */
343 cxl_ctx_get(); 347 cxl_ctx_get();
344 348
349 /* See the comment in afu_ioctl_start_work() */
350 smp_mb();
351
345 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { 352 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
346 put_pid(ctx->pid); 353 put_pid(ctx->pid);
347 ctx->pid = NULL; 354 ctx->pid = NULL;
348 cxl_adapter_context_put(ctx->afu->adapter); 355 cxl_adapter_context_put(ctx->afu->adapter);
349 cxl_ctx_put(); 356 cxl_ctx_put();
350 if (task) 357 if (task) {
351 cxl_context_mm_count_put(ctx); 358 cxl_context_mm_count_put(ctx);
359 if (ctx->mm)
360 mm_context_remove_copro(ctx->mm);
361 }
352 goto out; 362 goto out;
353 } 363 }
354 364
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 8c32040b9c09..12a41b2753f0 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/idr.h> 19#include <linux/idr.h>
20#include <linux/sched/mm.h> 20#include <linux/sched/mm.h>
21#include <linux/mmu_context.h>
21#include <asm/cputable.h> 22#include <asm/cputable.h>
22#include <asm/current.h> 23#include <asm/current.h>
23#include <asm/copro.h> 24#include <asm/copro.h>
@@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx)
267 268
268 /* Decrease the mm count on the context */ 269 /* Decrease the mm count on the context */
269 cxl_context_mm_count_put(ctx); 270 cxl_context_mm_count_put(ctx);
271 if (ctx->mm)
272 mm_context_remove_copro(ctx->mm);
270 ctx->mm = NULL; 273 ctx->mm = NULL;
271 274
272 return 0; 275 return 0;
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 4bfad9f6dc9f..76c0b0ca9388 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -19,6 +19,7 @@
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/sched/mm.h> 21#include <linux/sched/mm.h>
22#include <linux/mmu_context.h>
22#include <asm/cputable.h> 23#include <asm/cputable.h>
23#include <asm/current.h> 24#include <asm/current.h>
24#include <asm/copro.h> 25#include <asm/copro.h>
@@ -220,9 +221,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
220 /* ensure this mm_struct can't be freed */ 221 /* ensure this mm_struct can't be freed */
221 cxl_context_mm_count_get(ctx); 222 cxl_context_mm_count_get(ctx);
222 223
223 /* decrement the use count */ 224 if (ctx->mm) {
224 if (ctx->mm) 225 /* decrement the use count from above */
225 mmput(ctx->mm); 226 mmput(ctx->mm);
227 /* make TLBIs for this context global */
228 mm_context_add_copro(ctx->mm);
229 }
226 230
227 /* 231 /*
228 * Increment driver use count. Enables global TLBIs for hash 232 * Increment driver use count. Enables global TLBIs for hash
@@ -230,6 +234,20 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
230 */ 234 */
231 cxl_ctx_get(); 235 cxl_ctx_get();
232 236
237 /*
238 * A barrier is needed to make sure all TLBIs are global
239 * before we attach and the context starts being used by the
240 * adapter.
241 *
242 * Needed after mm_context_add_copro() for radix and
243 * cxl_ctx_get() for hash/p8.
244 *
245 * The barrier should really be mb(), since it involves a
246 * device. However, it's only useful when we have local
247 * vs. global TLBIs, i.e SMP=y. So keep smp_mb().
248 */
249 smp_mb();
250
233 trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); 251 trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
234 252
235 if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, 253 if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
@@ -240,6 +258,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
240 ctx->pid = NULL; 258 ctx->pid = NULL;
241 cxl_ctx_put(); 259 cxl_ctx_put();
242 cxl_context_mm_count_put(ctx); 260 cxl_context_mm_count_put(ctx);
261 if (ctx->mm)
262 mm_context_remove_copro(ctx->mm);
243 goto out; 263 goto out;
244 } 264 }
245 265