cxl: Enable global TLBIs for cxl contexts

The PSL and nMMU need to see all TLB invalidations for the memory contexts used on the adapter. For the hash memory model, it is done by making all TLBIs global as soon as the cxl driver is in use. For radix, we need something similar, but we can refine and only convert to global the invalidations for contexts actually used by the device. The new mm_context_add_copro() API increments the 'active_cpus' count for the contexts attached to the cxl adapter. As soon as there's more than 1 active cpu, the TLBIs for the context become global. Active cpu count must be decremented when detaching to restore locality if possible and to avoid overflowing the counter. The hash memory model support is somewhat limited, as we can't decrement the active cpus count when mm_context_remove_copro() is called, because we can't flush the TLB for a mm on hash. So TLBIs remain global on hash. Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> Fixes: f24be42aab37 ("cxl: Add psl9 specific code") Tested-by: Alistair Popple <alistair@popple.id.au> [mpe: Fold in updated comment on the barrier from Fred] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
author: Frederic Barrat <fbarrat@linux.vnet.ibm.com> 2017-09-03 14:15:13 -0400
committer: Michael Ellerman <mpe@ellerman.id.au> 2017-09-28 03:09:16 -0400
commit: 03b8abedf4f4965e7e9e0d4f92877c42c07ce19f (patch)
tree: 477b0118aedfca952b43149def528bcefbf8a8c8 /drivers/misc
parent: 6110236b9bbd177debc045c5fc29224444686ece (diff)
3 files changed, 38 insertions, 5 deletions
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index a0c44d16bf30..7c11bad5cded 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
 #include "cxl.h"
@@ -331,9 +332,12 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
                /* ensure this mm_struct can't be freed */
                cxl_context_mm_count_get(ctx);
-                /* decrement the use count */
+                if (ctx->mm) {
-                if (ctx->mm)
+                        /* decrement the use count from above */
                        mmput(ctx->mm);
+                        /* make TLBIs for this context global */
+                        mm_context_add_copro(ctx->mm);
+                }
        }
        /*
@@ -342,13 +346,19 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
         */
        cxl_ctx_get();
+        /* See the comment in afu_ioctl_start_work() */
+        smp_mb();
        if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
                put_pid(ctx->pid);
                ctx->pid = NULL;
                cxl_adapter_context_put(ctx->afu->adapter);
                cxl_ctx_put();
-                if (task)
+                if (task) {
                        cxl_context_mm_count_put(ctx);
+                        if (ctx->mm)
+                                mm_context_remove_copro(ctx->mm);
+                }
                goto out;
        }
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 8c32040b9c09..12a41b2753f0 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
 #include <asm/cputable.h>
 #include <asm/current.h>
 #include <asm/copro.h>
@@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx)
        /* Decrease the mm count on the context */
        cxl_context_mm_count_put(ctx);
+        if (ctx->mm)
+                mm_context_remove_copro(ctx->mm);
        ctx->mm = NULL;
        return 0;
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 4bfad9f6dc9f..76c0b0ca9388 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
 #include <asm/cputable.h>
 #include <asm/current.h>
 #include <asm/copro.h>
@@ -220,9 +221,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
        /* ensure this mm_struct can't be freed */
        cxl_context_mm_count_get(ctx);
-        /* decrement the use count */
+        if (ctx->mm) {
-        if (ctx->mm)
+                /* decrement the use count from above */
                mmput(ctx->mm);
+                /* make TLBIs for this context global */
+                mm_context_add_copro(ctx->mm);
+        }
        /*
         * Increment driver use count. Enables global TLBIs for hash
@@ -230,6 +234,20 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
         */
        cxl_ctx_get();
+        /*
+         * A barrier is needed to make sure all TLBIs are global
+         * before we attach and the context starts being used by the
+         * adapter.
+         *
+         * Needed after mm_context_add_copro() for radix and
+         * cxl_ctx_get() for hash/p8.
+         *
+         * The barrier should really be mb(), since it involves a
+         * device. However, it's only useful when we have local
+         * vs. global TLBIs, i.e SMP=y. So keep smp_mb().
+         */
+        smp_mb();
        trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
        if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
@@ -240,6 +258,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
                ctx->pid = NULL;
                cxl_ctx_put();
                cxl_context_mm_count_put(ctx);
+                if (ctx->mm)
+                        mm_context_remove_copro(ctx->mm);
                goto out;
        }
author	Frederic Barrat <fbarrat@linux.vnet.ibm.com>	2017-09-03 14:15:13 -0400
committer	Michael Ellerman <mpe@ellerman.id.au>	2017-09-28 03:09:16 -0400
commit	03b8abedf4f4965e7e9e0d4f92877c42c07ce19f (patch)
tree	477b0118aedfca952b43149def528bcefbf8a8c8 /drivers/misc
parent	6110236b9bbd177debc045c5fc29224444686ece (diff)