summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-10-16 08:15:11 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:11:46 -0400
commit2eb6dcb4694c8b83e22c50d9fd4d3fdd85b93c46 (patch)
tree0a2d10c9873b81fd6a6821959874d4345cc6bfad
parentecc6f27fd13e7560d124faf67d114b93d47b73de (diff)
gpu: nvgpu: Implement 64k large page support
Implement support for 64kB large page size. Add an API to create an address space via IOCTL so that we can accept flags, and assign one flag for enabling 64kB large page size. Also adds APIs to set per-context large page size. This is possible only on Maxwell, so return error if caller tries to set large page size on Kepler. Default large page size is still 128kB. Change-Id: I20b51c8f6d4a984acae8411ace3de9000c78e82f Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c53
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c14
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c41
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/fb_gm20b.c9
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h16
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h20
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c21
-rw-r--r--include/uapi/linux/nvgpu.h10
15 files changed, 190 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 5ca7c806..74d83a7d 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -37,8 +37,8 @@ static void release_as_share_id(struct gk20a_as *as, int id)
37 return; 37 return;
38} 38}
39 39
40static int gk20a_as_alloc_share(struct gk20a_as *as, 40int gk20a_as_alloc_share(struct gk20a_as *as,
41 struct gk20a_as_share **out) 41 u32 flags, struct gk20a_as_share **out)
42{ 42{
43 struct gk20a *g = gk20a_from_as(as); 43 struct gk20a *g = gk20a_from_as(as);
44 struct gk20a_as_share *as_share; 44 struct gk20a_as_share *as_share;
@@ -56,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as,
56 as_share->ref_cnt.counter = 1; 56 as_share->ref_cnt.counter = 1;
57 57
58 /* this will set as_share->vm. */ 58 /* this will set as_share->vm. */
59 err = g->ops.mm.vm_alloc_share(as_share); 59 err = g->ops.mm.vm_alloc_share(as_share, flags);
60 if (err) 60 if (err)
61 goto failed; 61 goto failed;
62 62
@@ -186,7 +186,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp)
186 return err; 186 return err;
187 } 187 }
188 188
189 err = gk20a_as_alloc_share(&g->as, &as_share); 189 err = gk20a_as_alloc_share(&g->as, 0, &as_share);
190 if (err) { 190 if (err) {
191 gk20a_dbg_fn("failed to alloc share"); 191 gk20a_dbg_fn("failed to alloc share");
192 gk20a_put_client(g); 192 gk20a_put_client(g);
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
index 457678ce..166000a8 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -42,5 +42,7 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share);
42int gk20a_as_dev_open(struct inode *inode, struct file *filp); 42int gk20a_as_dev_open(struct inode *inode, struct file *filp);
43int gk20a_as_dev_release(struct inode *inode, struct file *filp); 43int gk20a_as_dev_release(struct inode *inode, struct file *filp);
44long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 44long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
45int gk20a_as_alloc_share(struct gk20a_as *as,
46 u32 flags, struct gk20a_as_share **out);
45 47
46#endif 48#endif
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0e8eb497..bcc05079 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -119,6 +119,10 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
119 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 119 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
120 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit))); 120 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
121 121
122 if (c->g->ops.mm.set_big_page_size)
123 c->g->ops.mm.set_big_page_size(c->g, inst_ptr,
124 c->vm->gmmu_page_sizes[gmmu_page_size_big]);
125
122 return 0; 126 return 0;
123} 127}
124 128
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index ca587d00..6969a3a7 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -16,6 +16,8 @@
16 16
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/cdev.h> 18#include <linux/cdev.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
19#include <uapi/linux/nvgpu.h> 21#include <uapi/linux/nvgpu.h>
20 22
21#include "gk20a.h" 23#include "gk20a.h"
@@ -148,6 +150,53 @@ static int gk20a_ctrl_mark_compressible_write(
148 return ret; 150 return ret;
149} 151}
150 152
153static int gk20a_ctrl_alloc_as(
154 struct gk20a *g,
155 struct nvgpu_alloc_as_args *args)
156{
157 struct platform_device *dev = g->dev;
158 struct gk20a_as_share *as_share;
159 int err;
160 int fd;
161 struct file *file;
162 char *name;
163
164 err = get_unused_fd_flags(O_RDWR);
165 if (err < 0)
166 return err;
167 fd = err;
168
169 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
170 dev_name(&dev->dev), fd);
171
172 file = anon_inode_getfile(name, g->as.cdev.ops, NULL, O_RDWR);
173 kfree(name);
174 if (IS_ERR(file)) {
175 err = PTR_ERR(file);
176 goto clean_up;
177 }
178 fd_install(fd, file);
179
180 err = gk20a_get_client(g);
181 if (err)
182 goto clean_up;
183
184 err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share);
185 if (err)
186 goto clean_up_client;
187
188 file->private_data = as_share;
189
190 args->as_fd = fd;
191 return 0;
192
193clean_up_client:
194 gk20a_put_client(g);
195clean_up:
196 put_unused_fd(fd);
197 return err;
198}
199
151long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 200long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
152{ 201{
153 struct platform_device *dev = filp->private_data; 202 struct platform_device *dev = filp->private_data;
@@ -309,6 +358,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
309 err = gk20a_ctrl_mark_compressible_write(g, 358 err = gk20a_ctrl_mark_compressible_write(g,
310 (struct nvgpu_gpu_mark_compressible_write_args *)buf); 359 (struct nvgpu_gpu_mark_compressible_write_args *)buf);
311 break; 360 break;
361 case NVGPU_GPU_IOCTL_ALLOC_AS:
362 err = gk20a_ctrl_alloc_as(g,
363 (struct nvgpu_alloc_as_args *)buf);
364 break;
312 default: 365 default:
313 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 366 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
314 err = -ENOTTY; 367 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
index 52f2db4d..d5b3fd87 100644
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -18,6 +18,7 @@
18#include "gk20a.h" 18#include "gk20a.h"
19#include "kind_gk20a.h" 19#include "kind_gk20a.h"
20#include "hw_mc_gk20a.h" 20#include "hw_mc_gk20a.h"
21#include "hw_fb_gk20a.h"
21 22
22static void fb_gk20a_reset(struct gk20a *g) 23static void fb_gk20a_reset(struct gk20a *g)
23{ 24{
@@ -29,9 +30,22 @@ static void fb_gk20a_reset(struct gk20a *g)
29 | mc_enable_hub_enabled_f()); 30 | mc_enable_hub_enabled_f());
30} 31}
31 32
33static void gk20a_fb_set_mmu_page_size(struct gk20a *g)
34{
35 /* set large page size in fb */
36 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
37
38 fb_mmu_ctrl = (fb_mmu_ctrl &
39 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
40 fb_mmu_ctrl_vm_pg_size_128kb_f();
41
42 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
43}
44
32void gk20a_init_fb(struct gpu_ops *gops) 45void gk20a_init_fb(struct gpu_ops *gops)
33{ 46{
34 gops->fb.reset = fb_gk20a_reset; 47 gops->fb.reset = fb_gk20a_reset;
48 gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size;
35 gk20a_init_uncompressed_kind_map(); 49 gk20a_init_uncompressed_kind_map();
36 gk20a_init_kind_attr(); 50 gk20a_init_kind_attr();
37} 51}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8ebf6711..04a4cf66 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -141,6 +141,7 @@ struct gpu_ops {
141 void (*reset)(struct gk20a *g); 141 void (*reset)(struct gk20a *g);
142 void (*init_uncompressed_kind_map)(struct gk20a *g); 142 void (*init_uncompressed_kind_map)(struct gk20a *g);
143 void (*init_kind_attr)(struct gk20a *g); 143 void (*init_kind_attr)(struct gk20a *g);
144 void (*set_mmu_page_size)(struct gk20a *g);
144 } fb; 145 } fb;
145 struct { 146 struct {
146 void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod); 147 void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod);
@@ -291,13 +292,16 @@ struct gpu_ops {
291 bool va_allocated, 292 bool va_allocated,
292 int rw_flag); 293 int rw_flag);
293 void (*vm_remove)(struct vm_gk20a *vm); 294 void (*vm_remove)(struct vm_gk20a *vm);
294 int (*vm_alloc_share)(struct gk20a_as_share *as_share); 295 int (*vm_alloc_share)(struct gk20a_as_share *as_share,
296 u32 flags);
295 int (*vm_bind_channel)(struct gk20a_as_share *as_share, 297 int (*vm_bind_channel)(struct gk20a_as_share *as_share,
296 struct channel_gk20a *ch); 298 struct channel_gk20a *ch);
297 int (*fb_flush)(struct gk20a *g); 299 int (*fb_flush)(struct gk20a *g);
298 void (*l2_invalidate)(struct gk20a *g); 300 void (*l2_invalidate)(struct gk20a *g);
299 void (*l2_flush)(struct gk20a *g, bool invalidate); 301 void (*l2_flush)(struct gk20a *g, bool invalidate);
300 void (*tlb_invalidate)(struct vm_gk20a *vm); 302 void (*tlb_invalidate)(struct vm_gk20a *vm);
303 void (*set_big_page_size)(struct gk20a *g,
304 void *inst_ptr, int size);
301 } mm; 305 } mm;
302 struct { 306 struct {
303 int (*prepare_ucode)(struct gk20a *g); 307 int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 70f4294b..e7fdb336 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -327,17 +327,7 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g)
327 327
328 gk20a_dbg_fn(""); 328 gk20a_dbg_fn("");
329 329
330 /* set large page size in fb 330 g->ops.fb.set_mmu_page_size(g);
331 * note this is very early on, can we defer it ? */
332 {
333 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
334
335 fb_mmu_ctrl = (fb_mmu_ctrl &
336 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
337 fb_mmu_ctrl_vm_pg_size_128kb_f();
338
339 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
340 }
341 331
342 inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); 332 inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
343 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); 333 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa);
@@ -2173,6 +2163,7 @@ void gk20a_vm_put(struct vm_gk20a *vm)
2173 2163
2174static int gk20a_init_vm(struct mm_gk20a *mm, 2164static int gk20a_init_vm(struct mm_gk20a *mm,
2175 struct vm_gk20a *vm, 2165 struct vm_gk20a *vm,
2166 u32 big_page_size,
2176 u64 low_hole, 2167 u64 low_hole,
2177 u64 aperture_size, 2168 u64 aperture_size,
2178 bool big_pages, 2169 bool big_pages,
@@ -2184,7 +2175,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2184 size_t vma_size; 2175 size_t vma_size;
2185 2176
2186 /* note: keep the page sizes sorted lowest to highest here */ 2177 /* note: keep the page sizes sorted lowest to highest here */
2187 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; 2178 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
2188 2179
2189 vm->mm = mm; 2180 vm->mm = mm;
2190 2181
@@ -2331,7 +2322,7 @@ clean_up_pdes:
2331} 2322}
2332 2323
2333/* address space interfaces for the gk20a module */ 2324/* address space interfaces for the gk20a module */
2334int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) 2325int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2335{ 2326{
2336 struct gk20a_as *as = as_share->as; 2327 struct gk20a_as *as = as_share->as;
2337 struct gk20a *g = gk20a_from_as(as); 2328 struct gk20a *g = gk20a_from_as(as);
@@ -2351,8 +2342,15 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2351 vm->enable_ctag = true; 2342 vm->enable_ctag = true;
2352 2343
2353 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 2344 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id);
2354 err = gk20a_init_vm(mm, vm, 2345
2355 SZ_128K << 10, mm->channel.size, true, name); 2346 if (big_page_size && !g->ops.mm.set_big_page_size)
2347 return -EINVAL;
2348 if (big_page_size == 0)
2349 big_page_size =
2350 gk20a_get_platform(g->dev)->default_big_page_size;
2351
2352 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
2353 mm->channel.size, true, name);
2356 2354
2357 return 0; 2355 return 0;
2358} 2356}
@@ -2709,10 +2707,12 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2709 struct device *d = dev_from_gk20a(g); 2707 struct device *d = dev_from_gk20a(g);
2710 struct inst_desc *inst_block = &mm->bar1.inst_block; 2708 struct inst_desc *inst_block = &mm->bar1.inst_block;
2711 dma_addr_t iova; 2709 dma_addr_t iova;
2710 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
2712 2711
2713 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 2712 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2714 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 2713 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2715 gk20a_init_vm(mm, vm, SZ_4K, mm->bar1.aperture_size, false, "bar1"); 2714 gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
2715 mm->bar1.aperture_size, false, "bar1");
2716 2716
2717 gk20a_dbg_info("pde pa=0x%llx", 2717 gk20a_dbg_info("pde pa=0x%llx",
2718 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); 2718 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
@@ -2761,6 +2761,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2761 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 2761 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2762 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); 2762 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2763 2763
2764 if (g->ops.mm.set_big_page_size)
2765 g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size);
2766
2764 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); 2767 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa);
2765 return 0; 2768 return 0;
2766 2769
@@ -2789,11 +2792,12 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2789 struct device *d = dev_from_gk20a(g); 2792 struct device *d = dev_from_gk20a(g);
2790 struct inst_desc *inst_block = &mm->pmu.inst_block; 2793 struct inst_desc *inst_block = &mm->pmu.inst_block;
2791 dma_addr_t iova; 2794 dma_addr_t iova;
2795 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
2792 2796
2793 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 2797 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2794 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 2798 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2795 2799
2796 gk20a_init_vm(mm, vm, 2800 gk20a_init_vm(mm, vm, big_page_size,
2797 SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); 2801 SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system");
2798 2802
2799 gk20a_dbg_info("pde pa=0x%llx", 2803 gk20a_dbg_info("pde pa=0x%llx",
@@ -2842,6 +2846,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2842 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 2846 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2843 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); 2847 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2844 2848
2849 if (g->ops.mm.set_big_page_size)
2850 g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size);
2851
2845 return 0; 2852 return 0;
2846 2853
2847clean_up_inst_block: 2854clean_up_inst_block:
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b28daef7..3f7042ee 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -512,7 +512,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
512/* vm-as interface */ 512/* vm-as interface */
513struct nvgpu_as_alloc_space_args; 513struct nvgpu_as_alloc_space_args;
514struct nvgpu_as_free_space_args; 514struct nvgpu_as_free_space_args;
515int gk20a_vm_alloc_share(struct gk20a_as_share *as_share); 515int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags);
516int gk20a_vm_release_share(struct gk20a_as_share *as_share); 516int gk20a_vm_release_share(struct gk20a_as_share *as_share);
517int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, 517int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
518 struct nvgpu_as_alloc_space_args *args); 518 struct nvgpu_as_alloc_space_args *args);
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index ce0209fb..aada1537 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -76,6 +76,9 @@ struct gk20a_platform {
76 /* Adaptative ELPG: true = enable flase = disable */ 76 /* Adaptative ELPG: true = enable flase = disable */
77 bool enable_aelpg; 77 bool enable_aelpg;
78 78
79 /* Default big page size 64K or 128K */
80 u32 default_big_page_size;
81
79 /* Initialize the platform interface of the gk20a driver. 82 /* Initialize the platform interface of the gk20a driver.
80 * 83 *
81 * The platform implementation of this function must 84 * The platform implementation of this function must
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index 5513ea43..ccbf932f 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -443,6 +443,7 @@ struct gk20a_platform t132_gk20a_tegra_platform = {
443 .enable_elpg = true, 443 .enable_elpg = true,
444 .enable_aelpg = true, 444 .enable_aelpg = true,
445 445
446 .default_big_page_size = SZ_128K,
446 447
447 .probe = gk20a_tegra_probe, 448 .probe = gk20a_tegra_probe,
448 .late_probe = gk20a_tegra_late_probe, 449 .late_probe = gk20a_tegra_late_probe,
@@ -480,6 +481,8 @@ struct gk20a_platform gk20a_tegra_platform = {
480 .enable_elpg = true, 481 .enable_elpg = true,
481 .enable_aelpg = true, 482 .enable_aelpg = true,
482 483
484 .default_big_page_size = SZ_128K,
485
483 .probe = gk20a_tegra_probe, 486 .probe = gk20a_tegra_probe,
484 .late_probe = gk20a_tegra_late_probe, 487 .late_probe = gk20a_tegra_late_probe,
485 488
@@ -517,6 +520,8 @@ struct gk20a_platform gm20b_tegra_platform = {
517 .enable_elpg = true, 520 .enable_elpg = true,
518 .enable_aelpg = true, 521 .enable_aelpg = true,
519 522
523 .default_big_page_size = SZ_128K,
524
520 .probe = gk20a_tegra_probe, 525 .probe = gk20a_tegra_probe,
521 .late_probe = gk20a_tegra_late_probe, 526 .late_probe = gk20a_tegra_late_probe,
522 527
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
index 34ad6418..a2aa81d8 100644
--- a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
@@ -82,9 +82,18 @@ void gm20b_init_kind_attr(void)
82 } 82 }
83} 83}
84 84
85static void gm20b_fb_set_mmu_page_size(struct gk20a *g)
86{
87 /* set large page size in fb */
88 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
89 fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f();
90 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
91}
92
85void gm20b_init_fb(struct gpu_ops *gops) 93void gm20b_init_fb(struct gpu_ops *gops)
86{ 94{
87 gops->fb.init_fs_state = fb_gm20b_init_fs_state; 95 gops->fb.init_fs_state = fb_gm20b_init_fs_state;
96 gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size;
88 gm20b_init_uncompressed_kind_map(); 97 gm20b_init_uncompressed_kind_map();
89 gm20b_init_kind_attr(); 98 gm20b_init_kind_attr();
90} 99}
diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
index 817e4fc4..7655d2a3 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
@@ -66,6 +66,10 @@ static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
66{ 66{
67 return 0x0; 67 return 0x0;
68} 68}
69static inline u32 fb_mmu_ctrl_vm_pg_size_64kb_f(void)
70{
71 return 0x1;
72}
69static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r) 73static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
70{ 74{
71 return (r >> 15) & 0x1; 75 return (r >> 15) & 0x1;
@@ -78,6 +82,18 @@ static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
78{ 82{
79 return (r >> 16) & 0xff; 83 return (r >> 16) & 0xff;
80} 84}
85static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_v(u32 r)
86{
87 return (r >> 11) & 0x1;
88}
89static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_true_f(void)
90{
91 return 0x800;
92}
93static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_false_f(void)
94{
95 return 0x0;
96}
81static inline u32 fb_priv_mmu_phy_secure_r(void) 97static inline u32 fb_priv_mmu_phy_secure_r(void)
82{ 98{
83 return 0x00100ce4; 99 return 0x00100ce4;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h
index 6debecda..2e1df1d4 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h
@@ -78,6 +78,26 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void)
78{ 78{
79 return 0x4; 79 return 0x4;
80} 80}
81static inline u32 ram_in_big_page_size_f(u32 v)
82{
83 return (v & 0x1) << 11;
84}
85static inline u32 ram_in_big_page_size_m(void)
86{
87 return 0x1 << 11;
88}
89static inline u32 ram_in_big_page_size_w(void)
90{
91 return 128;
92}
93static inline u32 ram_in_big_page_size_128kb_f(void)
94{
95 return 0x0;
96}
97static inline u32 ram_in_big_page_size_64kb_f(void)
98{
99 return 0x800;
100}
81static inline u32 ram_in_page_dir_base_lo_f(u32 v) 101static inline u32 ram_in_page_dir_base_lo_f(u32 v)
82{ 102{
83 return (v & 0xfffff) << 12; 103 return (v & 0xfffff) << 12;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index b4622c0b..13e7859f 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -19,6 +19,7 @@
19#include "hw_gmmu_gm20b.h" 19#include "hw_gmmu_gm20b.h"
20#include "hw_fb_gm20b.h" 20#include "hw_fb_gm20b.h"
21#include "hw_gr_gm20b.h" 21#include "hw_gr_gm20b.h"
22#include "hw_ram_gm20b.h"
22 23
23static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, 24static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
24 enum gmmu_pgsz_gk20a pgsz_idx, 25 enum gmmu_pgsz_gk20a pgsz_idx,
@@ -259,6 +260,25 @@ bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g)
259 gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(); 260 gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v();
260} 261}
261 262
263void gm20b_mm_set_big_page_size(struct gk20a *g, void *inst_ptr, int size)
264{
265 u32 val;
266
267 gk20a_dbg_fn("");
268
269 gk20a_dbg_info("big page size %d\n", size);
270 val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w());
271 val &= ~ram_in_big_page_size_m();
272
273 if (size == SZ_64K)
274 val |= ram_in_big_page_size_64kb_f();
275 else
276 val |= ram_in_big_page_size_128kb_f();
277
278 gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val);
279 gk20a_dbg_fn("done");
280}
281
262void gm20b_init_mm(struct gpu_ops *gops) 282void gm20b_init_mm(struct gpu_ops *gops)
263{ 283{
264 gops->mm.set_sparse = gm20b_vm_put_sparse; 284 gops->mm.set_sparse = gm20b_vm_put_sparse;
@@ -273,4 +293,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
273 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; 293 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
274 gops->mm.l2_flush = gk20a_mm_l2_flush; 294 gops->mm.l2_flush = gk20a_mm_l2_flush;
275 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; 295 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
296 gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
276} 297}
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 694c497c..c9e50b36 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -175,6 +175,12 @@ struct nvgpu_gpu_mark_compressible_write_args {
175 __u32 reserved[3]; /* must be zero */ 175 __u32 reserved[3]; /* must be zero */
176}; 176};
177 177
178struct nvgpu_alloc_as_args {
179 __u32 big_page_size;
180 __s32 as_fd;
181 __u64 reserved; /* must be zero */
182};
183
178#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ 184#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
179 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) 185 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
180#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ 186#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -189,9 +195,11 @@ struct nvgpu_gpu_mark_compressible_write_args {
189 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 6, struct nvgpu_gpu_prepare_compressible_read_args) 195 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 6, struct nvgpu_gpu_prepare_compressible_read_args)
190#define NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE \ 196#define NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE \
191 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 7, struct nvgpu_gpu_mark_compressible_write_args) 197 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 7, struct nvgpu_gpu_mark_compressible_write_args)
198#define NVGPU_GPU_IOCTL_ALLOC_AS \
199 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 8, struct nvgpu_alloc_as_args)
192 200
193#define NVGPU_GPU_IOCTL_LAST \ 201#define NVGPU_GPU_IOCTL_LAST \
194 _IOC_NR(NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE) 202 _IOC_NR(NVGPU_GPU_IOCTL_ALLOC_AS)
195#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ 203#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
196 sizeof(struct nvgpu_gpu_prepare_compressible_read_args) 204 sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
197 205