cxl: Driver code for powernv PCIe based cards for userspace access

This is the core of the cxl driver. It adds support for using cxl cards in the powernv environment only (ie POWER8 bare metal). It allows access to cxl accelerators by userspace using the /dev/cxl/afuM.N char devices. The kernel driver has no knowledge of the function implemented by the accelerator. It provides services to userspace via the /dev/cxl/afuM.N devices. When a program opens this device and runs the start work IOCTL, the accelerator will have coherent access to that processes memory using the same virtual addresses. That process may mmap the device to access any MMIO space the accelerator provides. Also, reads on the device will allow interrupts to be received. These services are further documented in a later patch in Documentation/powerpc/cxl.txt. Documentation of the cxl hardware architecture and userspace API is provided in subsequent patches. Signed-off-by: Ian Munsie <imunsie@au1.ibm.com> Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
author: Ian Munsie <imunsie@au1.ibm.com> 2014-10-08 04:55:02 -0400
committer: Michael Ellerman <mpe@ellerman.id.au> 2014-10-08 05:15:57 -0400
commit: f204e0b8cedd7da1dfcfd05ed6b7692737e24029 (patch)
tree: 35ca15049345cdd5dbed38229a6b3add05610658 /drivers/misc/cxl/native.c
parent: 10542ca0156f60571ef41799d44d40dd4cb0a473 (diff)
1 files changed, 683 insertions, 0 deletions
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
new file mode 100644
index 000000000000..623286a77114
--- /dev/null
+++ b/drivers/misc/cxl/native.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/synch.h>
+#include <misc/cxl.h>
+#include "cxl.h"
+static int afu_control(struct cxl_afu *afu, u64 command,
+                       u64 result, u64 mask, bool enabled)
+{
+        u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+        unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+        spin_lock(&afu->afu_cntl_lock);
+        pr_devel("AFU command starting: %llx\n", command);
+        cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl | command);
+        AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+        while ((AFU_Cntl & mask) != result) {
+                if (time_after_eq(jiffies, timeout)) {
+                        dev_warn(&afu->dev, "WARNING: AFU control timed out!\n");
+                        spin_unlock(&afu->afu_cntl_lock);
+                        return -EBUSY;
+                }
+                pr_devel_ratelimited("AFU control... (0x%.16llx)\n",
+                                     AFU_Cntl | command);
+                cpu_relax();
+                AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+        };
+        pr_devel("AFU command complete: %llx\n", command);
+        afu->enabled = enabled;
+        spin_unlock(&afu->afu_cntl_lock);
+        return 0;
+}
+static int afu_enable(struct cxl_afu *afu)
+{
+        pr_devel("AFU enable request\n");
+        return afu_control(afu, CXL_AFU_Cntl_An_E,
+                           CXL_AFU_Cntl_An_ES_Enabled,
+                           CXL_AFU_Cntl_An_ES_MASK, true);
+}
+int cxl_afu_disable(struct cxl_afu *afu)
+{
+        pr_devel("AFU disable request\n");
+        return afu_control(afu, 0, CXL_AFU_Cntl_An_ES_Disabled,
+                           CXL_AFU_Cntl_An_ES_MASK, false);
+}
+/* This will disable as well as reset */
+int cxl_afu_reset(struct cxl_afu *afu)
+{
+        pr_devel("AFU reset request\n");
+        return afu_control(afu, CXL_AFU_Cntl_An_RA,
+                           CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled,
+                           CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
+                           false);
+}
+static int afu_check_and_enable(struct cxl_afu *afu)
+{
+        if (afu->enabled)
+                return 0;
+        return afu_enable(afu);
+}
+int cxl_psl_purge(struct cxl_afu *afu)
+{
+        u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+        u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+        u64 dsisr, dar;
+        u64 start, end;
+        unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+        pr_devel("PSL purge request\n");
+        if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+                WARN(1, "psl_purge request while AFU not disabled!\n");
+                cxl_afu_disable(afu);
+        }
+        cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+                       PSL_CNTL | CXL_PSL_SCNTL_An_Pc);
+        start = local_clock();
+        PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+        while ((PSL_CNTL &  CXL_PSL_SCNTL_An_Ps_MASK)
+                        == CXL_PSL_SCNTL_An_Ps_Pending) {
+                if (time_after_eq(jiffies, timeout)) {
+                        dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n");
+                        return -EBUSY;
+                }
+                dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+                pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx  PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr);
+                if (dsisr & CXL_PSL_DSISR_TRANS) {
+                        dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
+                        dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%.16llx, DAR: 0x%.16llx\n", dsisr, dar);
+                        cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+                } else if (dsisr) {
+                        dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%.16llx\n", dsisr);
+                        cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+                } else {
+                        cpu_relax();
+                }
+                PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+        };
+        end = local_clock();
+        pr_devel("PSL purged in %lld ns\n", end - start);
+        cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+                       PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc);
+        return 0;
+}
+static int spa_max_procs(int spa_size)
+{
+        /*
+         * From the CAIA:
+         *    end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255
+         * Most of that junk is really just an overly-complicated way of saying
+         * the last 256 bytes are __aligned(128), so it's really:
+         *    end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255
+         * and
+         *    end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1
+         * so
+         *    sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256
+         * Ignore the alignment (which is safe in this case as long as we are
+         * careful with our rounding) and solve for n:
+         */
+        return ((spa_size / 8) - 96) / 17;
+}
+static int alloc_spa(struct cxl_afu *afu)
+{
+        u64 spap;
+        /* Work out how many pages to allocate */
+        afu->spa_order = 0;
+        do {
+                afu->spa_order++;
+                afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE;
+                afu->spa_max_procs = spa_max_procs(afu->spa_size);
+        } while (afu->spa_max_procs < afu->num_procs);
+        WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */
+        if (!(afu->spa = (struct cxl_process_element *)
+              __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->spa_order))) {
+                pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n");
+                return -ENOMEM;
+        }
+        pr_devel("spa pages: %i afu->spa_max_procs: %i   afu->num_procs: %i\n",
+                 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs);
+        afu->sw_command_status = (__be64 *)((char *)afu->spa +
+                                            ((afu->spa_max_procs + 3) * 128));
+        spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr;
+        spap |= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size;
+        spap |= CXL_PSL_SPAP_V;
+        pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap);
+        cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
+        return 0;
+}
+static void release_spa(struct cxl_afu *afu)
+{
+        free_pages((unsigned long) afu->spa, afu->spa_order);
+}
+int cxl_tlb_slb_invalidate(struct cxl *adapter)
+{
+        unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+        pr_devel("CXL adapter wide TLBIA & SLBIA\n");
+        cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A);
+        cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL);
+        while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) {
+                if (time_after_eq(jiffies, timeout)) {
+                        dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
+                        return -EBUSY;
+                }
+                cpu_relax();
+        }
+        cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL);
+        while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) {
+                if (time_after_eq(jiffies, timeout)) {
+                        dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
+                        return -EBUSY;
+                }
+                cpu_relax();
+        }
+        return 0;
+}
+int cxl_afu_slbia(struct cxl_afu *afu)
+{
+        unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+        pr_devel("cxl_afu_slbia issuing SLBIA command\n");
+        cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
+        while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
+                if (time_after_eq(jiffies, timeout)) {
+                        dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
+                        return -EBUSY;
+                }
+                cpu_relax();
+        }
+        return 0;
+}
+static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1)
+{
+        int rc;
+        /* 1. Disable SSTP by writing 0 to SSTP1[V] */
+        cxl_p2n_write(afu, CXL_SSTP1_An, 0);
+        /* 2. Invalidate all SLB entries */
+        if ((rc = cxl_afu_slbia(afu)))
+                return rc;
+        /* 3. Set SSTP0_An */
+        cxl_p2n_write(afu, CXL_SSTP0_An, sstp0);
+        /* 4. Set SSTP1_An */
+        cxl_p2n_write(afu, CXL_SSTP1_An, sstp1);
+        return 0;
+}
+/* Using per slice version may improve performance here. (ie. SLBIA_An) */
+static void slb_invalid(struct cxl_context *ctx)
+{
+        struct cxl *adapter = ctx->afu->adapter;
+        u64 slbia;
+        WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex));
+        cxl_p1_write(adapter, CXL_PSL_LBISEL,
+                        ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) |
+                        be32_to_cpu(ctx->elem->lpid));
+        cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
+        while (1) {
+                slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
+                if (!(slbia & CXL_TLB_SLB_P))
+                        break;
+                cpu_relax();
+        }
+}
+static int do_process_element_cmd(struct cxl_context *ctx,
+                                  u64 cmd, u64 pe_state)
+{
+        u64 state;
+        WARN_ON(!ctx->afu->enabled);
+        ctx->elem->software_state = cpu_to_be32(pe_state);
+        smp_wmb();
+        *(ctx->afu->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe);
+        smp_mb();
+        cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
+        while (1) {
+                state = be64_to_cpup(ctx->afu->sw_command_status);
+                if (state == ~0ULL) {
+                        pr_err("cxl: Error adding process element to AFU\n");
+                        return -1;
+                }
+                if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK  | CXL_SPA_SW_LINK_MASK)) ==
+                    (cmd | (cmd >> 16) | ctx->pe))
+                        break;
+                /*
+                 * The command won't finish in the PSL if there are
+                 * outstanding DSIs.  Hence we need to yield here in
+                 * case there are outstanding DSIs that we need to
+                 * service.  Tuning possiblity: we could wait for a
+                 * while before sched
+                 */
+                schedule();
+        }
+        return 0;
+}
+static int add_process_element(struct cxl_context *ctx)
+{
+        int rc = 0;
+        mutex_lock(&ctx->afu->spa_mutex);
+        pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe);
+        if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V)))
+                ctx->pe_inserted = true;
+        pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe);
+        mutex_unlock(&ctx->afu->spa_mutex);
+        return rc;
+}
+static int terminate_process_element(struct cxl_context *ctx)
+{
+        int rc = 0;
+        /* fast path terminate if it's already invalid */
+        if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V)))
+                return rc;
+        mutex_lock(&ctx->afu->spa_mutex);
+        pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
+        rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
+                                    CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
+        ctx->elem->software_state = 0;  /* Remove Valid bit */
+        pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
+        mutex_unlock(&ctx->afu->spa_mutex);
+        return rc;
+}
+static int remove_process_element(struct cxl_context *ctx)
+{
+        int rc = 0;
+        mutex_lock(&ctx->afu->spa_mutex);
+        pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
+        if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0)))
+                ctx->pe_inserted = false;
+        slb_invalid(ctx);
+        pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
+        mutex_unlock(&ctx->afu->spa_mutex);
+        return rc;
+}
+static void assign_psn_space(struct cxl_context *ctx)
+{
+        if (!ctx->afu->pp_size || ctx->master) {
+                ctx->psn_phys = ctx->afu->psn_phys;
+                ctx->psn_size = ctx->afu->adapter->ps_size;
+        } else {
+                ctx->psn_phys = ctx->afu->psn_phys +
+                        (ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe);
+                ctx->psn_size = ctx->afu->pp_size;
+        }
+}
+static int activate_afu_directed(struct cxl_afu *afu)
+{
+        int rc;
+        dev_info(&afu->dev, "Activating AFU directed mode\n");
+        if (alloc_spa(afu))
+                return -ENOMEM;
+        cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU);
+        cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+        cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L);
+        afu->current_mode = CXL_MODE_DIRECTED;
+        afu->num_procs = afu->max_procs_virtualised;
+        if ((rc = cxl_chardev_m_afu_add(afu)))
+                return rc;
+        if ((rc = cxl_sysfs_afu_m_add(afu)))
+                goto err;
+        if ((rc = cxl_chardev_s_afu_add(afu)))
+                goto err1;
+        return 0;
+err1:
+        cxl_sysfs_afu_m_remove(afu);
+err:
+        cxl_chardev_afu_remove(afu);
+        return rc;
+}
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE)
+#else
+#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
+#endif
+static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+        u64 sr;
+        int r, result;
+        assign_psn_space(ctx);
+        ctx->elem->ctxtime = 0; /* disable */
+        ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+        ctx->elem->haurp = 0; /* disable */
+        ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1));
+        sr = CXL_PSL_SR_An_SC;
+        if (ctx->master)
+                sr |= CXL_PSL_SR_An_MP;
+        if (mfspr(SPRN_LPCR) & LPCR_TC)
+                sr |= CXL_PSL_SR_An_TC;
+        /* HV=0, PR=1, R=1 for userspace
+         * For kernel contexts: this would need to change
+         */
+        sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
+        set_endian(sr);
+        sr &= ~(CXL_PSL_SR_An_HV);
+        if (!test_tsk_thread_flag(current, TIF_32BIT))
+                sr |= CXL_PSL_SR_An_SF;
+        ctx->elem->common.pid = cpu_to_be32(current->pid);
+        ctx->elem->common.tid = 0;
+        ctx->elem->sr = cpu_to_be64(sr);
+        ctx->elem->common.csrp = 0; /* disable */
+        ctx->elem->common.aurp0 = 0; /* disable */
+        ctx->elem->common.aurp1 = 0; /* disable */
+        cxl_prefault(ctx, wed);
+        ctx->elem->common.sstp0 = cpu_to_be64(ctx->sstp0);
+        ctx->elem->common.sstp1 = cpu_to_be64(ctx->sstp1);
+        for (r = 0; r < CXL_IRQ_RANGES; r++) {
+                ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
+                ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
+        }
+        ctx->elem->common.amr = cpu_to_be64(amr);
+        ctx->elem->common.wed = cpu_to_be64(wed);
+        /* first guy needs to enable */
+        if ((result = afu_check_and_enable(ctx->afu)))
+                return result;
+        add_process_element(ctx);
+        return 0;
+}
+static int deactivate_afu_directed(struct cxl_afu *afu)
+{
+        dev_info(&afu->dev, "Deactivating AFU directed mode\n");
+        afu->current_mode = 0;
+        afu->num_procs = 0;
+        cxl_sysfs_afu_m_remove(afu);
+        cxl_chardev_afu_remove(afu);
+        cxl_afu_reset(afu);
+        cxl_afu_disable(afu);
+        cxl_psl_purge(afu);
+        release_spa(afu);
+        return 0;
+}
+static int activate_dedicated_process(struct cxl_afu *afu)
+{
+        dev_info(&afu->dev, "Activating dedicated process mode\n");
+        cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process);
+        cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */
+        cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);    /* disable */
+        cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+        cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID));
+        cxl_p1n_write(afu, CXL_HAURP_An, 0);       /* disable */
+        cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1));
+        cxl_p2n_write(afu, CXL_CSRP_An, 0);        /* disable */
+        cxl_p2n_write(afu, CXL_AURP0_An, 0);       /* disable */
+        cxl_p2n_write(afu, CXL_AURP1_An, 0);       /* disable */
+        afu->current_mode = CXL_MODE_DEDICATED;
+        afu->num_procs = 1;
+        return cxl_chardev_d_afu_add(afu);
+}
+static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+        struct cxl_afu *afu = ctx->afu;
+        u64 sr;
+        int rc;
+        sr = CXL_PSL_SR_An_SC;
+        set_endian(sr);
+        if (ctx->master)
+                sr |= CXL_PSL_SR_An_MP;
+        if (mfspr(SPRN_LPCR) & LPCR_TC)
+                sr |= CXL_PSL_SR_An_TC;
+        sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
+        if (!test_tsk_thread_flag(current, TIF_32BIT))
+                sr |= CXL_PSL_SR_An_SF;
+        cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32);
+        cxl_p1n_write(afu, CXL_PSL_SR_An, sr);
+        if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1)))
+                return rc;
+        cxl_prefault(ctx, wed);
+        cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An,
+                       (((u64)ctx->irqs.offset[0] & 0xffff) << 48) |
+                       (((u64)ctx->irqs.offset[1] & 0xffff) << 32) |
+                       (((u64)ctx->irqs.offset[2] & 0xffff) << 16) |
+                        ((u64)ctx->irqs.offset[3] & 0xffff));
+        cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64)
+                       (((u64)ctx->irqs.range[0] & 0xffff) << 48) |
+                       (((u64)ctx->irqs.range[1] & 0xffff) << 32) |
+                       (((u64)ctx->irqs.range[2] & 0xffff) << 16) |
+                        ((u64)ctx->irqs.range[3] & 0xffff));
+        cxl_p2n_write(afu, CXL_PSL_AMR_An, amr);
+        /* master only context for dedicated */
+        assign_psn_space(ctx);
+        if ((rc = cxl_afu_reset(afu)))
+                return rc;
+        cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
+        return afu_enable(afu);
+}
+static int deactivate_dedicated_process(struct cxl_afu *afu)
+{
+        dev_info(&afu->dev, "Deactivating dedicated process mode\n");
+        afu->current_mode = 0;
+        afu->num_procs = 0;
+        cxl_chardev_afu_remove(afu);
+        return 0;
+}
+int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+{
+        if (mode == CXL_MODE_DIRECTED)
+                return deactivate_afu_directed(afu);
+        if (mode == CXL_MODE_DEDICATED)
+                return deactivate_dedicated_process(afu);
+        return 0;
+}
+int cxl_afu_deactivate_mode(struct cxl_afu *afu)
+{
+        return _cxl_afu_deactivate_mode(afu, afu->current_mode);
+}
+int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
+{
+        if (!mode)
+                return 0;
+        if (!(mode & afu->modes_supported))
+                return -EINVAL;
+        if (mode == CXL_MODE_DIRECTED)
+                return activate_afu_directed(afu);
+        if (mode == CXL_MODE_DEDICATED)
+                return activate_dedicated_process(afu);
+        return -EINVAL;
+}
+int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
+{
+        ctx->kernel = kernel;
+        if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+                return attach_afu_directed(ctx, wed, amr);
+        if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
+                return attach_dedicated(ctx, wed, amr);
+        return -EINVAL;
+}
+static inline int detach_process_native_dedicated(struct cxl_context *ctx)
+{
+        cxl_afu_reset(ctx->afu);
+        cxl_afu_disable(ctx->afu);
+        cxl_psl_purge(ctx->afu);
+        return 0;
+}
+/*
+ * TODO: handle case when this is called inside a rcu_read_lock() which may
+ * happen when we unbind the driver (ie. cxl_context_detach_all()) .  Terminate
+ * & remove use a mutex lock and schedule which will not good with lock held.
+ * May need to write do_process_element_cmd() that handles outstanding page
+ * faults synchronously.
+ */
+static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
+{
+        if (!ctx->pe_inserted)
+                return 0;
+        if (terminate_process_element(ctx))
+                return -1;
+        if (remove_process_element(ctx))
+                return -1;
+        return 0;
+}
+int cxl_detach_process(struct cxl_context *ctx)
+{
+        if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
+                return detach_process_native_dedicated(ctx);
+        return detach_process_native_afu_directed(ctx);
+}
+int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info)
+{
+        u64 pidtid;
+        info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
+        info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An);
+        info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An);
+        pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An);
+        info->pid = pidtid >> 32;
+        info->tid = pidtid & 0xffffffff;
+        info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An);
+        info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An);
+        return 0;
+}
+static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
+{
+        u64 dsisr;
+        pr_devel("RECOVERING FROM PSL ERROR... (0x%.16llx)\n", errstat);
+        /* Clear PSL_DSISR[PE] */
+        dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+        cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE);
+        /* Write 1s to clear error status bits */
+        cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat);
+}
+int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+{
+        if (tfc)
+                cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc);
+        if (psl_reset_mask)
+                recover_psl_err(ctx->afu, psl_reset_mask);
+        return 0;
+}
+int cxl_check_error(struct cxl_afu *afu)
+{
+        return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL);
+}
author	Ian Munsie <imunsie@au1.ibm.com>	2014-10-08 04:55:02 -0400
committer	Michael Ellerman <mpe@ellerman.id.au>	2014-10-08 05:15:57 -0400
commit	f204e0b8cedd7da1dfcfd05ed6b7692737e24029 (patch)
tree	35ca15049345cdd5dbed38229a6b3add05610658 /drivers/misc/cxl/native.c
parent	10542ca0156f60571ef41799d44d40dd4cb0a473 (diff)

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c new file mode 100644 index 000000000000..623286a77114 --- /dev/null +++ b/drivers/misc/cxl/native.c
@@ -0,0 +1,683 @@
	1	/*
	2	* Copyright 2014 IBM Corp.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation; either version
	7	* 2 of the License, or (at your option) any later version.
	8	*/
	9
	10	#include <linux/spinlock.h>
	11	#include <linux/sched.h>
	12	#include <linux/slab.h>
	13	#include <linux/sched.h>
	14	#include <linux/mutex.h>
	15	#include <linux/mm.h>
	16	#include <linux/uaccess.h>
	17	#include <asm/synch.h>
	18	#include <misc/cxl.h>
	19
	20	#include "cxl.h"
	21
	22	static int afu_control(struct cxl_afu *afu, u64 command,
	23	u64 result, u64 mask, bool enabled)
	24	{
	25	u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
	26	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
	27
	28	spin_lock(&afu->afu_cntl_lock);
	29	pr_devel("AFU command starting: %llx\n", command);
	30
	31	cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl \| command);
	32
	33	AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
	34	while ((AFU_Cntl & mask) != result) {
	35	if (time_after_eq(jiffies, timeout)) {
	36	dev_warn(&afu->dev, "WARNING: AFU control timed out!\n");
	37	spin_unlock(&afu->afu_cntl_lock);
	38	return -EBUSY;
	39	}
	40	pr_devel_ratelimited("AFU control... (0x%.16llx)\n",
	41	AFU_Cntl \| command);
	42	cpu_relax();
	43	AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
	44	};
	45	pr_devel("AFU command complete: %llx\n", command);
	46	afu->enabled = enabled;
	47	spin_unlock(&afu->afu_cntl_lock);
	48
	49	return 0;
	50	}
	51
	52	static int afu_enable(struct cxl_afu *afu)
	53	{
	54	pr_devel("AFU enable request\n");
	55
	56	return afu_control(afu, CXL_AFU_Cntl_An_E,
	57	CXL_AFU_Cntl_An_ES_Enabled,
	58	CXL_AFU_Cntl_An_ES_MASK, true);
	59	}
	60
	61	int cxl_afu_disable(struct cxl_afu *afu)
	62	{
	63	pr_devel("AFU disable request\n");
	64
	65	return afu_control(afu, 0, CXL_AFU_Cntl_An_ES_Disabled,
	66	CXL_AFU_Cntl_An_ES_MASK, false);
	67	}
	68
	69	/* This will disable as well as reset */
	70	int cxl_afu_reset(struct cxl_afu *afu)
	71	{
	72	pr_devel("AFU reset request\n");
	73
	74	return afu_control(afu, CXL_AFU_Cntl_An_RA,
	75	CXL_AFU_Cntl_An_RS_Complete \| CXL_AFU_Cntl_An_ES_Disabled,
	76	CXL_AFU_Cntl_An_RS_MASK \| CXL_AFU_Cntl_An_ES_MASK,
	77	false);
	78	}
	79
	80	static int afu_check_and_enable(struct cxl_afu *afu)
	81	{
	82	if (afu->enabled)
	83	return 0;
	84	return afu_enable(afu);
	85	}
	86
	87	int cxl_psl_purge(struct cxl_afu *afu)
	88	{
	89	u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
	90	u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
	91	u64 dsisr, dar;
	92	u64 start, end;
	93	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
	94
	95	pr_devel("PSL purge request\n");
	96
	97	if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
	98	WARN(1, "psl_purge request while AFU not disabled!\n");
	99	cxl_afu_disable(afu);
	100	}
	101
	102	cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
	103	PSL_CNTL \| CXL_PSL_SCNTL_An_Pc);
	104	start = local_clock();
	105	PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
	106	while ((PSL_CNTL & CXL_PSL_SCNTL_An_Ps_MASK)
	107	== CXL_PSL_SCNTL_An_Ps_Pending) {
	108	if (time_after_eq(jiffies, timeout)) {
	109	dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n");
	110	return -EBUSY;
	111	}
	112	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
	113	pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr);
	114	if (dsisr & CXL_PSL_DSISR_TRANS) {
	115	dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
	116	dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%.16llx, DAR: 0x%.16llx\n", dsisr, dar);
	117	cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
	118	} else if (dsisr) {
	119	dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%.16llx\n", dsisr);
	120	cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
	121	} else {
	122	cpu_relax();
	123	}
	124	PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
	125	};
	126	end = local_clock();
	127	pr_devel("PSL purged in %lld ns\n", end - start);
	128
	129	cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
	130	PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc);
	131	return 0;
	132	}
	133
	134	static int spa_max_procs(int spa_size)
	135	{
	136	/*
	137	* From the CAIA:
	138	* end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n8) + 127) >> 7) 128) + 255
	139	* Most of that junk is really just an overly-complicated way of saying
	140	* the last 256 bytes are __aligned(128), so it's really:
	141	* end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255
	142	* and
	143	* end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1
	144	* so
	145	* sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256
	146	* Ignore the alignment (which is safe in this case as long as we are
	147	* careful with our rounding) and solve for n:
	148	*/
	149	return ((spa_size / 8) - 96) / 17;
	150	}
	151
	152	static int alloc_spa(struct cxl_afu *afu)
	153	{
	154	u64 spap;
	155
	156	/* Work out how many pages to allocate */
	157	afu->spa_order = 0;
	158	do {
	159	afu->spa_order++;
	160	afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE;
	161	afu->spa_max_procs = spa_max_procs(afu->spa_size);
	162	} while (afu->spa_max_procs < afu->num_procs);
	163
	164	WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */
	165
	166	if (!(afu->spa = (struct cxl_process_element *)
	167	__get_free_pages(GFP_KERNEL \| __GFP_ZERO, afu->spa_order))) {
	168	pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n");
	169	return -ENOMEM;
	170	}
	171	pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n",
	172	1<<afu->spa_order, afu->spa_max_procs, afu->num_procs);
	173
	174	afu->sw_command_status = (__be64 )((char )afu->spa +
	175	((afu->spa_max_procs + 3) * 128));
	176
	177	spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr;
	178	spap \|= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size;
	179	spap \|= CXL_PSL_SPAP_V;
	180	pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap);
	181	cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
	182
	183	return 0;
	184	}
	185
	186	static void release_spa(struct cxl_afu *afu)
	187	{
	188	free_pages((unsigned long) afu->spa, afu->spa_order);
	189	}
	190
	191	int cxl_tlb_slb_invalidate(struct cxl *adapter)
	192	{
	193	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
	194
	195	pr_devel("CXL adapter wide TLBIA & SLBIA\n");
	196
	197	cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A);
	198
	199	cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL);
	200	while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) {
	201	if (time_after_eq(jiffies, timeout)) {
	202	dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
	203	return -EBUSY;
	204	}
	205	cpu_relax();
	206	}
	207
	208	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL);
	209	while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) {
	210	if (time_after_eq(jiffies, timeout)) {
	211	dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
	212	return -EBUSY;
	213	}
	214	cpu_relax();
	215	}
	216	return 0;
	217	}
	218
	219	int cxl_afu_slbia(struct cxl_afu *afu)
	220	{
	221	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
	222
	223	pr_devel("cxl_afu_slbia issuing SLBIA command\n");
	224	cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
	225	while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
	226	if (time_after_eq(jiffies, timeout)) {
	227	dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
	228	return -EBUSY;
	229	}
	230	cpu_relax();
	231	}
	232	return 0;
	233	}
	234
	235	static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1)
	236	{
	237	int rc;
	238
	239	/* 1. Disable SSTP by writing 0 to SSTP1[V] */
	240	cxl_p2n_write(afu, CXL_SSTP1_An, 0);
	241
	242	/* 2. Invalidate all SLB entries */
	243	if ((rc = cxl_afu_slbia(afu)))
	244	return rc;
	245
	246	/* 3. Set SSTP0_An */
	247	cxl_p2n_write(afu, CXL_SSTP0_An, sstp0);
	248
	249	/* 4. Set SSTP1_An */
	250	cxl_p2n_write(afu, CXL_SSTP1_An, sstp1);
	251
	252	return 0;
	253	}
	254
	255	/* Using per slice version may improve performance here. (ie. SLBIA_An) */
	256	static void slb_invalid(struct cxl_context *ctx)
	257	{
	258	struct cxl *adapter = ctx->afu->adapter;
	259	u64 slbia;
	260
	261	WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex));
	262
	263	cxl_p1_write(adapter, CXL_PSL_LBISEL,
	264	((u64)be32_to_cpu(ctx->elem->common.pid) << 32) \|
	265	be32_to_cpu(ctx->elem->lpid));
	266	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
	267
	268	while (1) {
	269	slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
	270	if (!(slbia & CXL_TLB_SLB_P))
	271	break;
	272	cpu_relax();
	273	}
	274	}
	275
	276	static int do_process_element_cmd(struct cxl_context *ctx,
	277	u64 cmd, u64 pe_state)
	278	{
	279	u64 state;
	280
	281	WARN_ON(!ctx->afu->enabled);
	282
	283	ctx->elem->software_state = cpu_to_be32(pe_state);
	284	smp_wmb();
	285	*(ctx->afu->sw_command_status) = cpu_to_be64(cmd \| 0 \| ctx->pe);
	286	smp_mb();
	287	cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd \| ctx->pe);
	288	while (1) {
	289	state = be64_to_cpup(ctx->afu->sw_command_status);
	290	if (state == ~0ULL) {
	291	pr_err("cxl: Error adding process element to AFU\n");
	292	return -1;
	293	}
	294	if ((state & (CXL_SPA_SW_CMD_MASK \| CXL_SPA_SW_STATE_MASK \| CXL_SPA_SW_LINK_MASK)) ==
	295	(cmd \| (cmd >> 16) \| ctx->pe))
	296	break;
	297	/*
	298	* The command won't finish in the PSL if there are
	299	* outstanding DSIs. Hence we need to yield here in
	300	* case there are outstanding DSIs that we need to
	301	* service. Tuning possiblity: we could wait for a
	302	* while before sched
	303	*/
	304	schedule();
	305
	306	}
	307	return 0;
	308	}
	309
	310	static int add_process_element(struct cxl_context *ctx)
	311	{
	312	int rc = 0;
	313
	314	mutex_lock(&ctx->afu->spa_mutex);
	315	pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe);
	316	if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V)))
	317	ctx->pe_inserted = true;
	318	pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe);
	319	mutex_unlock(&ctx->afu->spa_mutex);
	320	return rc;
	321	}
	322
	323	static int terminate_process_element(struct cxl_context *ctx)
	324	{
	325	int rc = 0;
	326
	327	/* fast path terminate if it's already invalid */
	328	if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V)))
	329	return rc;
	330
	331	mutex_lock(&ctx->afu->spa_mutex);
	332	pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
	333	rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
	334	CXL_PE_SOFTWARE_STATE_V \| CXL_PE_SOFTWARE_STATE_T);
	335	ctx->elem->software_state = 0; /* Remove Valid bit */
	336	pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
	337	mutex_unlock(&ctx->afu->spa_mutex);
	338	return rc;
	339	}
	340
	341	static int remove_process_element(struct cxl_context *ctx)
	342	{
	343	int rc = 0;
	344
	345	mutex_lock(&ctx->afu->spa_mutex);
	346	pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
	347	if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0)))
	348	ctx->pe_inserted = false;
	349	slb_invalid(ctx);
	350	pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
	351	mutex_unlock(&ctx->afu->spa_mutex);
	352
	353	return rc;
	354	}
	355
	356
	357	static void assign_psn_space(struct cxl_context *ctx)
	358	{
	359	if (!ctx->afu->pp_size \|\| ctx->master) {
	360	ctx->psn_phys = ctx->afu->psn_phys;
	361	ctx->psn_size = ctx->afu->adapter->ps_size;
	362	} else {
	363	ctx->psn_phys = ctx->afu->psn_phys +
	364	(ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe);
	365	ctx->psn_size = ctx->afu->pp_size;
	366	}
	367	}
	368
	369	static int activate_afu_directed(struct cxl_afu *afu)
	370	{
	371	int rc;
	372
	373	dev_info(&afu->dev, "Activating AFU directed mode\n");
	374
	375	if (alloc_spa(afu))
	376	return -ENOMEM;
	377
	378	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU);
	379	cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
	380	cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F \| CXL_PSL_ID_An_L);
	381
	382	afu->current_mode = CXL_MODE_DIRECTED;
	383	afu->num_procs = afu->max_procs_virtualised;
	384
	385	if ((rc = cxl_chardev_m_afu_add(afu)))
	386	return rc;
	387
	388	if ((rc = cxl_sysfs_afu_m_add(afu)))
	389	goto err;
	390
	391	if ((rc = cxl_chardev_s_afu_add(afu)))
	392	goto err1;
	393
	394	return 0;
	395	err1:
	396	cxl_sysfs_afu_m_remove(afu);
	397	err:
	398	cxl_chardev_afu_remove(afu);
	399	return rc;
	400	}
	401
	402	#ifdef CONFIG_CPU_LITTLE_ENDIAN
	403	#define set_endian(sr) ((sr) \|= CXL_PSL_SR_An_LE)
	404	#else
	405	#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
	406	#endif
	407
	408	static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
	409	{
	410	u64 sr;
	411	int r, result;
	412
	413	assign_psn_space(ctx);
	414
	415	ctx->elem->ctxtime = 0; /* disable */
	416	ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
	417	ctx->elem->haurp = 0; /* disable */
	418	ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1));
	419
	420	sr = CXL_PSL_SR_An_SC;
	421	if (ctx->master)
	422	sr \|= CXL_PSL_SR_An_MP;
	423	if (mfspr(SPRN_LPCR) & LPCR_TC)
	424	sr \|= CXL_PSL_SR_An_TC;
	425	/* HV=0, PR=1, R=1 for userspace
	426	* For kernel contexts: this would need to change
	427	*/
	428	sr \|= CXL_PSL_SR_An_PR \| CXL_PSL_SR_An_R;
	429	set_endian(sr);
	430	sr &= ~(CXL_PSL_SR_An_HV);
	431	if (!test_tsk_thread_flag(current, TIF_32BIT))
	432	sr \|= CXL_PSL_SR_An_SF;
	433	ctx->elem->common.pid = cpu_to_be32(current->pid);
	434	ctx->elem->common.tid = 0;
	435	ctx->elem->sr = cpu_to_be64(sr);
	436
	437	ctx->elem->common.csrp = 0; /* disable */
	438	ctx->elem->common.aurp0 = 0; /* disable */
	439	ctx->elem->common.aurp1 = 0; /* disable */
	440
	441	cxl_prefault(ctx, wed);
	442
	443	ctx->elem->common.sstp0 = cpu_to_be64(ctx->sstp0);
	444	ctx->elem->common.sstp1 = cpu_to_be64(ctx->sstp1);
	445
	446	for (r = 0; r < CXL_IRQ_RANGES; r++) {
	447	ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
	448	ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
	449	}
	450
	451	ctx->elem->common.amr = cpu_to_be64(amr);
	452	ctx->elem->common.wed = cpu_to_be64(wed);
	453
	454	/* first guy needs to enable */
	455	if ((result = afu_check_and_enable(ctx->afu)))
	456	return result;
	457
	458	add_process_element(ctx);
	459
	460	return 0;
	461	}
	462
	463	static int deactivate_afu_directed(struct cxl_afu *afu)
	464	{
	465	dev_info(&afu->dev, "Deactivating AFU directed mode\n");
	466
	467	afu->current_mode = 0;
	468	afu->num_procs = 0;
	469
	470	cxl_sysfs_afu_m_remove(afu);
	471	cxl_chardev_afu_remove(afu);
	472
	473	cxl_afu_reset(afu);
	474	cxl_afu_disable(afu);
	475	cxl_psl_purge(afu);
	476
	477	release_spa(afu);
	478
	479	return 0;
	480	}
	481
	482	static int activate_dedicated_process(struct cxl_afu *afu)
	483	{
	484	dev_info(&afu->dev, "Activating dedicated process mode\n");
	485
	486	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process);
	487
	488	cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */
	489	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); /* disable */
	490	cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
	491	cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID));
	492	cxl_p1n_write(afu, CXL_HAURP_An, 0); /* disable */
	493	cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1));
	494
	495	cxl_p2n_write(afu, CXL_CSRP_An, 0); /* disable */
	496	cxl_p2n_write(afu, CXL_AURP0_An, 0); /* disable */
	497	cxl_p2n_write(afu, CXL_AURP1_An, 0); /* disable */
	498
	499	afu->current_mode = CXL_MODE_DEDICATED;
	500	afu->num_procs = 1;
	501
	502	return cxl_chardev_d_afu_add(afu);
	503	}
	504
	505	static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
	506	{
	507	struct cxl_afu *afu = ctx->afu;
	508	u64 sr;
	509	int rc;
	510
	511	sr = CXL_PSL_SR_An_SC;
	512	set_endian(sr);
	513	if (ctx->master)
	514	sr \|= CXL_PSL_SR_An_MP;
	515	if (mfspr(SPRN_LPCR) & LPCR_TC)
	516	sr \|= CXL_PSL_SR_An_TC;
	517	sr \|= CXL_PSL_SR_An_PR \| CXL_PSL_SR_An_R;
	518	if (!test_tsk_thread_flag(current, TIF_32BIT))
	519	sr \|= CXL_PSL_SR_An_SF;
	520	cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32);
	521	cxl_p1n_write(afu, CXL_PSL_SR_An, sr);
	522
	523	if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1)))
	524	return rc;
	525
	526	cxl_prefault(ctx, wed);
	527
	528	cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An,
	529	(((u64)ctx->irqs.offset[0] & 0xffff) << 48) \|
	530	(((u64)ctx->irqs.offset[1] & 0xffff) << 32) \|
	531	(((u64)ctx->irqs.offset[2] & 0xffff) << 16) \|
	532	((u64)ctx->irqs.offset[3] & 0xffff));
	533	cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64)
	534	(((u64)ctx->irqs.range[0] & 0xffff) << 48) \|
	535	(((u64)ctx->irqs.range[1] & 0xffff) << 32) \|
	536	(((u64)ctx->irqs.range[2] & 0xffff) << 16) \|
	537	((u64)ctx->irqs.range[3] & 0xffff));
	538
	539	cxl_p2n_write(afu, CXL_PSL_AMR_An, amr);
	540
	541	/* master only context for dedicated */
	542	assign_psn_space(ctx);
	543
	544	if ((rc = cxl_afu_reset(afu)))
	545	return rc;
	546
	547	cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
	548
	549	return afu_enable(afu);
	550	}
	551
	552	static int deactivate_dedicated_process(struct cxl_afu *afu)
	553	{
	554	dev_info(&afu->dev, "Deactivating dedicated process mode\n");
	555
	556	afu->current_mode = 0;
	557	afu->num_procs = 0;
	558
	559	cxl_chardev_afu_remove(afu);
	560
	561	return 0;
	562	}
	563
	564	int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode)
	565	{
	566	if (mode == CXL_MODE_DIRECTED)
	567	return deactivate_afu_directed(afu);
	568	if (mode == CXL_MODE_DEDICATED)
	569	return deactivate_dedicated_process(afu);
	570	return 0;
	571	}
	572
	573	int cxl_afu_deactivate_mode(struct cxl_afu *afu)
	574	{
	575	return _cxl_afu_deactivate_mode(afu, afu->current_mode);
	576	}
	577
	578	int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
	579	{
	580	if (!mode)
	581	return 0;
	582	if (!(mode & afu->modes_supported))
	583	return -EINVAL;
	584
	585	if (mode == CXL_MODE_DIRECTED)
	586	return activate_afu_directed(afu);
	587	if (mode == CXL_MODE_DEDICATED)
	588	return activate_dedicated_process(afu);
	589
	590	return -EINVAL;
	591	}
	592
	593	int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
	594	{
	595	ctx->kernel = kernel;
	596	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
	597	return attach_afu_directed(ctx, wed, amr);
	598
	599	if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
	600	return attach_dedicated(ctx, wed, amr);
	601
	602	return -EINVAL;
	603	}
	604
	605	static inline int detach_process_native_dedicated(struct cxl_context *ctx)
	606	{
	607	cxl_afu_reset(ctx->afu);
	608	cxl_afu_disable(ctx->afu);
	609	cxl_psl_purge(ctx->afu);
	610	return 0;
	611	}
	612
	613	/*
	614	* TODO: handle case when this is called inside a rcu_read_lock() which may
	615	* happen when we unbind the driver (ie. cxl_context_detach_all()) . Terminate
	616	* & remove use a mutex lock and schedule which will not good with lock held.
	617	* May need to write do_process_element_cmd() that handles outstanding page
	618	* faults synchronously.
	619	*/
	620	static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
	621	{
	622	if (!ctx->pe_inserted)
	623	return 0;
	624	if (terminate_process_element(ctx))
	625	return -1;
	626	if (remove_process_element(ctx))
	627	return -1;
	628
	629	return 0;
	630	}
	631
	632	int cxl_detach_process(struct cxl_context *ctx)
	633	{
	634	if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
	635	return detach_process_native_dedicated(ctx);
	636
	637	return detach_process_native_afu_directed(ctx);
	638	}
	639
	640	int cxl_get_irq(struct cxl_context ctx, struct cxl_irq_info info)
	641	{
	642	u64 pidtid;
	643
	644	info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
	645	info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An);
	646	info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An);
	647	pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An);
	648	info->pid = pidtid >> 32;
	649	info->tid = pidtid & 0xffffffff;
	650	info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An);
	651	info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An);
	652
	653	return 0;
	654	}
	655
	656	static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
	657	{
	658	u64 dsisr;
	659
	660	pr_devel("RECOVERING FROM PSL ERROR... (0x%.16llx)\n", errstat);
	661
	662	/* Clear PSL_DSISR[PE] */
	663	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
	664	cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE);
	665
	666	/* Write 1s to clear error status bits */
	667	cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat);
	668	}
	669
	670	int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
	671	{
	672	if (tfc)
	673	cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc);
	674	if (psl_reset_mask)
	675	recover_psl_err(ctx->afu, psl_reset_mask);
	676
	677	return 0;
	678	}
	679
	680	int cxl_check_error(struct cxl_afu *afu)
	681	{
	682	return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL);
	683	}