12 files changed, 67 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index afbad75c..a5790e3a 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -628,7 +628,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
                   "vm=%s "
                   "%-5s GPU virt %#-12llx +%#-9llx    phys %#-12llx "
                   "phys offset: %#-4llx;  pgsz: %3dkb perm=%-2s | "
-                   "kind=%#02x APT=%-6s %c%c%c%c%c",
+                   "kind=%#02x APT=%-6s %c%c%c%c%c%c",
                   vm->name,
                   (sgt != NULL) ? "MAP" : "UNMAP",
                   virt_addr,
@@ -643,7 +643,8 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
                   attrs->sparse    ? 'S' : '-',
                   attrs->priv      ? 'P' : '-',
                   attrs->coherent  ? 'I' : '-',
-                   attrs->valid     ? 'V' : '-');
+                   attrs->valid     ? 'V' : '-',
+                   attrs->platform_atomic ? 'A' : '-');
        err = __nvgpu_gmmu_do_update_page_table(vm,
                                                sgt,
@@ -702,7 +703,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
                .priv      = priv,
                .coherent  = flags & NVGPU_VM_MAP_IO_COHERENT,
                .valid     = (flags & NVGPU_VM_MAP_UNMAPPED_PTE) == 0U,
-                .aperture  = aperture
+                .aperture  = aperture,
+                .platform_atomic = (flags & NVGPU_VM_MAP_PLATFORM_ATOMIC) != 0U
        };
        /*
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 8f6bd66e..2326e0ae 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -31,7 +31,7 @@
 * will not add any checks. If you want to simply use the default coherency then
 * use nvgpu_aperture_mask().
 */
-u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture,
+u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture,
                            u32 sysmem_mask, u32 sysmem_coh_mask,
                            u32 vidmem_mask)
 {
@@ -71,7 +71,7 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
                ap = APERTURE_SYSMEM_COH;
        }
-        return nvgpu_aperture_mask_coh(g, ap,
+        return nvgpu_aperture_mask_raw(g, ap,
                                       sysmem_mask,
                                       sysmem_coh_mask,
                                       vidmem_mask);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 644531f1..18922d46 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -222,7 +222,7 @@ static void __update_pte(struct vm_gk20a *vm,
                pte_w[0] |= gmmu_pte_privilege_true_f();
        }
-        pte_w[1] = nvgpu_aperture_mask_coh(g, attrs->aperture,
+        pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
                                         gmmu_pte_aperture_sys_mem_ncoh_f(),
                                         gmmu_pte_aperture_sys_mem_coh_f(),
                                         gmmu_pte_aperture_video_memory_f()) |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 2c0056e1..a4b291d8 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -1,7 +1,7 @@
 /*
 * GP10B MMU
 *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -78,6 +78,32 @@ clean_up_va:
        return err;
 }
+/*
+ * For GV11B and TU104 MSS NVLINK HW settings are in force_snoop mode.
+ * This will force all the GPU mappings to be coherent.
+ * By default the mem aperture sets as sysmem_non_coherent and will use L2 mode.
+ * Change target pte aperture to sysmem_coherent if mem attribute requests for
+ * platform atomics to use rmw atomic capability.
+ *
+ */
+static u32 gmmu_aperture_mask(struct gk20a *g,
+                                  enum nvgpu_aperture mem_ap,
+                                  bool platform_atomic_attr,
+                                  u32 sysmem_mask,
+                                  u32 sysmem_coh_mask,
+                                  u32 vidmem_mask)
+{
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) &&
+                             platform_atomic_attr) {
+                mem_ap = APERTURE_SYSMEM_COH;
+        }
+        return nvgpu_aperture_mask_raw(g, mem_ap,
+                                sysmem_mask,
+                                sysmem_coh_mask,
+                                vidmem_mask);
+}
 static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
                                    const struct gk20a_mmu_level *l,
                                    struct nvgpu_gmmu_pd *pd,
@@ -191,8 +217,9 @@ static void __update_pte(struct vm_gk20a *vm,
        u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
                gmmu_new_pte_address_sys_f(phys_shifted) :
                gmmu_new_pte_address_vid_f(phys_shifted);
-        u32 pte_tgt = nvgpu_aperture_mask_coh(g,
+        u32 pte_tgt = gmmu_aperture_mask(g,
                                        attrs->aperture,
+                                        attrs->platform_atomic,
                                        gmmu_new_pte_aperture_sys_mem_ncoh_f(),
                                        gmmu_new_pte_aperture_sys_mem_coh_f(),
                                        gmmu_new_pte_aperture_video_memory_f());
@@ -253,7 +280,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
                "vm=%s "
                "PTE: i=%-4u size=%-2u | "
                "GPU %#-12llx  phys %#-12llx "
-                "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c "
+                "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c%c "
                "ctag=0x%08x "
                "[0x%08x, 0x%08x]",
                vm->name,
@@ -268,6 +295,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
                attrs->priv      ? 'P' : '-',
                attrs->coherent  ? 'I' : '-',
                attrs->valid     ? 'V' : '-',
+                attrs->platform_atomic ? 'A' : '-',
                (u32)attrs->ctag / g->ops.fb.compression_page_size(g),
                pte_w[1], pte_w[0]);
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 4f298133..68ea78a6 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -1,7 +1,7 @@
 /*
 * GV11B Tegra HAL interface
 *
- * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -957,6 +957,7 @@ int gv11b_init_hal(struct gk20a *g)
        __nvgpu_set_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL, true);
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
        g->name = "gv11b";
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 76f0f2bd..fc7bab90 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -178,10 +178,13 @@ struct gk20a;
 /* NVGPU_GPU_IOCTL_GET_GPU_LOAD is available */
 #define NVGPU_SUPPORT_GET_GPU_LOAD      70
+/* PLATFORM_ATOMIC support */
+#define NVGPU_SUPPORT_PLATFORM_ATOMIC           71
 /*
 * Must be greater than the largest bit offset in the above list.
 */
-#define NVGPU_MAX_ENABLED_BITS                  71
+#define NVGPU_MAX_ENABLED_BITS                  72
 /**
 * nvgpu_is_enabled - Check if the passed flag is enabled.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index a70015f8..66cb5218 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -155,6 +155,7 @@ struct nvgpu_gmmu_pd {
 *   valid:       Set if the PTE should be marked valid.
 *   aperture:    VIDMEM or SYSMEM.
 *   debug:       When set print debugging info.
+ *   platform_atomic: True if platform_atomic flag is valid.
 *
 * These fields are dynamically updated as necessary during the map:
 *
@@ -173,8 +174,8 @@ struct nvgpu_gmmu_attrs {
        bool                     valid;
        enum nvgpu_aperture      aperture;
        bool                     debug;
        bool                     l3_alloc;
+        bool                     platform_atomic;
 };
 struct gk20a_mmu_level {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 8e62a1d6..4e84f2af 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -349,7 +349,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
 u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
 u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
-u32 nvgpu_aperture_mask_coh(struct gk20a *g, enum nvgpu_aperture aperture,
+u32 nvgpu_aperture_mask_raw(struct gk20a *g, enum nvgpu_aperture aperture,
                u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
 u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
                u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index 66749717..3867c745 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -213,6 +213,7 @@ struct vm_gk20a {
 #define NVGPU_VM_MAP_UNMAPPED_PTE                       BIT32(3)
 #define NVGPU_VM_MAP_DIRECT_KIND_CTRL                   BIT32(4)
 #define NVGPU_VM_MAP_L3_ALLOC                           BIT32(5)
+#define NVGPU_VM_MAP_PLATFORM_ATOMIC                    BIT32(6)
 #define NVGPU_KIND_INVALID                              -1
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c
index 27a91cf6..dc807ab6 100644
--- a/drivers/gpu/nvgpu/os/linux/vm.c
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -54,6 +54,8 @@ static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags)
                core_flags |= NVGPU_VM_MAP_L3_ALLOC;
        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)
                core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL;
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC)
+                core_flags |= NVGPU_VM_MAP_PLATFORM_ATOMIC;
        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS)
                nvgpu_warn(g, "Ignoring deprecated flag: "
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c
index 1bcd151a..baddae13 100644
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_gv11b.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,7 @@ int vgpu_gv11b_init_gpu_characteristics(struct gk20a *g)
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true);
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true);
        __nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
        return 0;
 }
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 0488e563..9197011b 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1,7 +1,7 @@
 /*
 * NVGPU Public Interface Header
 *
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -1894,6 +1894,7 @@ struct nvgpu_as_bind_channel_args {
 #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6)
 #define NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC          (1 << 7)
 #define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL  (1 << 8)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC   (1 << 9)
 /*
 * VM map buffer IOCTL
@@ -1940,6 +1941,10 @@ struct nvgpu_as_bind_channel_args {
 *     Set when userspace plans to pass in @compr_kind and @incompr_kind
 *     instead of letting the kernel work out kind fields.
 *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC
+ *
+ *     Specify that a mapping should use platform atomics.
+ *
 * @kind  [IN]
 *
 *   Specify the kind to use for the mapping.