summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c53
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c14
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c41
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/fb_gm20b.c9
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h16
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h20
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c21
-rw-r--r--include/uapi/linux/nvgpu.h10
15 files changed, 190 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 5ca7c806..74d83a7d 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -37,8 +37,8 @@ static void release_as_share_id(struct gk20a_as *as, int id)
37 return; 37 return;
38} 38}
39 39
40static int gk20a_as_alloc_share(struct gk20a_as *as, 40int gk20a_as_alloc_share(struct gk20a_as *as,
41 struct gk20a_as_share **out) 41 u32 flags, struct gk20a_as_share **out)
42{ 42{
43 struct gk20a *g = gk20a_from_as(as); 43 struct gk20a *g = gk20a_from_as(as);
44 struct gk20a_as_share *as_share; 44 struct gk20a_as_share *as_share;
@@ -56,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as,
56 as_share->ref_cnt.counter = 1; 56 as_share->ref_cnt.counter = 1;
57 57
58 /* this will set as_share->vm. */ 58 /* this will set as_share->vm. */
59 err = g->ops.mm.vm_alloc_share(as_share); 59 err = g->ops.mm.vm_alloc_share(as_share, flags);
60 if (err) 60 if (err)
61 goto failed; 61 goto failed;
62 62
@@ -186,7 +186,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp)
186 return err; 186 return err;
187 } 187 }
188 188
189 err = gk20a_as_alloc_share(&g->as, &as_share); 189 err = gk20a_as_alloc_share(&g->as, 0, &as_share);
190 if (err) { 190 if (err) {
191 gk20a_dbg_fn("failed to alloc share"); 191 gk20a_dbg_fn("failed to alloc share");
192 gk20a_put_client(g); 192 gk20a_put_client(g);
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
index 457678ce..166000a8 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -42,5 +42,7 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share);
42int gk20a_as_dev_open(struct inode *inode, struct file *filp); 42int gk20a_as_dev_open(struct inode *inode, struct file *filp);
43int gk20a_as_dev_release(struct inode *inode, struct file *filp); 43int gk20a_as_dev_release(struct inode *inode, struct file *filp);
44long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 44long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
45int gk20a_as_alloc_share(struct gk20a_as *as,
46 u32 flags, struct gk20a_as_share **out);
45 47
46#endif 48#endif
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0e8eb497..bcc05079 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -119,6 +119,10 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
119 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 119 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
120 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit))); 120 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
121 121
122 if (c->g->ops.mm.set_big_page_size)
123 c->g->ops.mm.set_big_page_size(c->g, inst_ptr,
124 c->vm->gmmu_page_sizes[gmmu_page_size_big]);
125
122 return 0; 126 return 0;
123} 127}
124 128
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index ca587d00..6969a3a7 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -16,6 +16,8 @@
16 16
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/cdev.h> 18#include <linux/cdev.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
19#include <uapi/linux/nvgpu.h> 21#include <uapi/linux/nvgpu.h>
20 22
21#include "gk20a.h" 23#include "gk20a.h"
@@ -148,6 +150,53 @@ static int gk20a_ctrl_mark_compressible_write(
148 return ret; 150 return ret;
149} 151}
150 152
153static int gk20a_ctrl_alloc_as(
154 struct gk20a *g,
155 struct nvgpu_alloc_as_args *args)
156{
157 struct platform_device *dev = g->dev;
158 struct gk20a_as_share *as_share;
159 int err;
160 int fd;
161 struct file *file;
162 char *name;
163
164 err = get_unused_fd_flags(O_RDWR);
165 if (err < 0)
166 return err;
167 fd = err;
168
169 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
170 dev_name(&dev->dev), fd);
171
172 file = anon_inode_getfile(name, g->as.cdev.ops, NULL, O_RDWR);
173 kfree(name);
174 if (IS_ERR(file)) {
175 err = PTR_ERR(file);
176 goto clean_up;
177 }
178 fd_install(fd, file);
179
180 err = gk20a_get_client(g);
181 if (err)
182 goto clean_up;
183
184 err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share);
185 if (err)
186 goto clean_up_client;
187
188 file->private_data = as_share;
189
190 args->as_fd = fd;
191 return 0;
192
193clean_up_client:
194 gk20a_put_client(g);
195clean_up:
196 put_unused_fd(fd);
197 return err;
198}
199
151long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 200long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
152{ 201{
153 struct platform_device *dev = filp->private_data; 202 struct platform_device *dev = filp->private_data;
@@ -309,6 +358,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
309 err = gk20a_ctrl_mark_compressible_write(g, 358 err = gk20a_ctrl_mark_compressible_write(g,
310 (struct nvgpu_gpu_mark_compressible_write_args *)buf); 359 (struct nvgpu_gpu_mark_compressible_write_args *)buf);
311 break; 360 break;
361 case NVGPU_GPU_IOCTL_ALLOC_AS:
362 err = gk20a_ctrl_alloc_as(g,
363 (struct nvgpu_alloc_as_args *)buf);
364 break;
312 default: 365 default:
313 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 366 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
314 err = -ENOTTY; 367 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
index 52f2db4d..d5b3fd87 100644
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -18,6 +18,7 @@
18#include "gk20a.h" 18#include "gk20a.h"
19#include "kind_gk20a.h" 19#include "kind_gk20a.h"
20#include "hw_mc_gk20a.h" 20#include "hw_mc_gk20a.h"
21#include "hw_fb_gk20a.h"
21 22
22static void fb_gk20a_reset(struct gk20a *g) 23static void fb_gk20a_reset(struct gk20a *g)
23{ 24{
@@ -29,9 +30,22 @@ static void fb_gk20a_reset(struct gk20a *g)
29 | mc_enable_hub_enabled_f()); 30 | mc_enable_hub_enabled_f());
30} 31}
31 32
33static void gk20a_fb_set_mmu_page_size(struct gk20a *g)
34{
35 /* set large page size in fb */
36 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
37
38 fb_mmu_ctrl = (fb_mmu_ctrl &
39 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
40 fb_mmu_ctrl_vm_pg_size_128kb_f();
41
42 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
43}
44
32void gk20a_init_fb(struct gpu_ops *gops) 45void gk20a_init_fb(struct gpu_ops *gops)
33{ 46{
34 gops->fb.reset = fb_gk20a_reset; 47 gops->fb.reset = fb_gk20a_reset;
48 gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size;
35 gk20a_init_uncompressed_kind_map(); 49 gk20a_init_uncompressed_kind_map();
36 gk20a_init_kind_attr(); 50 gk20a_init_kind_attr();
37} 51}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8ebf6711..04a4cf66 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -141,6 +141,7 @@ struct gpu_ops {
141 void (*reset)(struct gk20a *g); 141 void (*reset)(struct gk20a *g);
142 void (*init_uncompressed_kind_map)(struct gk20a *g); 142 void (*init_uncompressed_kind_map)(struct gk20a *g);
143 void (*init_kind_attr)(struct gk20a *g); 143 void (*init_kind_attr)(struct gk20a *g);
144 void (*set_mmu_page_size)(struct gk20a *g);
144 } fb; 145 } fb;
145 struct { 146 struct {
146 void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod); 147 void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod);
@@ -291,13 +292,16 @@ struct gpu_ops {
291 bool va_allocated, 292 bool va_allocated,
292 int rw_flag); 293 int rw_flag);
293 void (*vm_remove)(struct vm_gk20a *vm); 294 void (*vm_remove)(struct vm_gk20a *vm);
294 int (*vm_alloc_share)(struct gk20a_as_share *as_share); 295 int (*vm_alloc_share)(struct gk20a_as_share *as_share,
296 u32 flags);
295 int (*vm_bind_channel)(struct gk20a_as_share *as_share, 297 int (*vm_bind_channel)(struct gk20a_as_share *as_share,
296 struct channel_gk20a *ch); 298 struct channel_gk20a *ch);
297 int (*fb_flush)(struct gk20a *g); 299 int (*fb_flush)(struct gk20a *g);
298 void (*l2_invalidate)(struct gk20a *g); 300 void (*l2_invalidate)(struct gk20a *g);
299 void (*l2_flush)(struct gk20a *g, bool invalidate); 301 void (*l2_flush)(struct gk20a *g, bool invalidate);
300 void (*tlb_invalidate)(struct vm_gk20a *vm); 302 void (*tlb_invalidate)(struct vm_gk20a *vm);
303 void (*set_big_page_size)(struct gk20a *g,
304 void *inst_ptr, int size);
301 } mm; 305 } mm;
302 struct { 306 struct {
303 int (*prepare_ucode)(struct gk20a *g); 307 int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 70f4294b..e7fdb336 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -327,17 +327,7 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g)
327 327
328 gk20a_dbg_fn(""); 328 gk20a_dbg_fn("");
329 329
330 /* set large page size in fb 330 g->ops.fb.set_mmu_page_size(g);
331 * note this is very early on, can we defer it ? */
332 {
333 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
334
335 fb_mmu_ctrl = (fb_mmu_ctrl &
336 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
337 fb_mmu_ctrl_vm_pg_size_128kb_f();
338
339 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
340 }
341 331
342 inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); 332 inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
343 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); 333 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa);
@@ -2173,6 +2163,7 @@ void gk20a_vm_put(struct vm_gk20a *vm)
2173 2163
2174static int gk20a_init_vm(struct mm_gk20a *mm, 2164static int gk20a_init_vm(struct mm_gk20a *mm,
2175 struct vm_gk20a *vm, 2165 struct vm_gk20a *vm,
2166 u32 big_page_size,
2176 u64 low_hole, 2167 u64 low_hole,
2177 u64 aperture_size, 2168 u64 aperture_size,
2178 bool big_pages, 2169 bool big_pages,
@@ -2184,7 +2175,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
2184 size_t vma_size; 2175 size_t vma_size;
2185 2176
2186 /* note: keep the page sizes sorted lowest to highest here */ 2177 /* note: keep the page sizes sorted lowest to highest here */
2187 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; 2178 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
2188 2179
2189 vm->mm = mm; 2180 vm->mm = mm;
2190 2181
@@ -2331,7 +2322,7 @@ clean_up_pdes:
2331} 2322}
2332 2323
2333/* address space interfaces for the gk20a module */ 2324/* address space interfaces for the gk20a module */
2334int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) 2325int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2335{ 2326{
2336 struct gk20a_as *as = as_share->as; 2327 struct gk20a_as *as = as_share->as;
2337 struct gk20a *g = gk20a_from_as(as); 2328 struct gk20a *g = gk20a_from_as(as);
@@ -2351,8 +2342,15 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2351 vm->enable_ctag = true; 2342 vm->enable_ctag = true;
2352 2343
2353 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 2344 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id);
2354 err = gk20a_init_vm(mm, vm, 2345
2355 SZ_128K << 10, mm->channel.size, true, name); 2346 if (big_page_size && !g->ops.mm.set_big_page_size)
2347 return -EINVAL;
2348 if (big_page_size == 0)
2349 big_page_size =
2350 gk20a_get_platform(g->dev)->default_big_page_size;
2351
2352 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
2353 mm->channel.size, true, name);
2356 2354
2357 return 0; 2355 return 0;
2358} 2356}
@@ -2709,10 +2707,12 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2709 struct device *d = dev_from_gk20a(g); 2707 struct device *d = dev_from_gk20a(g);
2710 struct inst_desc *inst_block = &mm->bar1.inst_block; 2708 struct inst_desc *inst_block = &mm->bar1.inst_block;
2711 dma_addr_t iova; 2709 dma_addr_t iova;
2710 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
2712 2711
2713 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 2712 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2714 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 2713 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2715 gk20a_init_vm(mm, vm, SZ_4K, mm->bar1.aperture_size, false, "bar1"); 2714 gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
2715 mm->bar1.aperture_size, false, "bar1");
2716 2716
2717 gk20a_dbg_info("pde pa=0x%llx", 2717 gk20a_dbg_info("pde pa=0x%llx",
2718 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); 2718 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
@@ -2761,6 +2761,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2761 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 2761 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2762 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); 2762 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2763 2763
2764 if (g->ops.mm.set_big_page_size)
2765 g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size);
2766
2764 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); 2767 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa);
2765 return 0; 2768 return 0;
2766 2769
@@ -2789,11 +2792,12 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2789 struct device *d = dev_from_gk20a(g); 2792 struct device *d = dev_from_gk20a(g);
2790 struct inst_desc *inst_block = &mm->pmu.inst_block; 2793 struct inst_desc *inst_block = &mm->pmu.inst_block;
2791 dma_addr_t iova; 2794 dma_addr_t iova;
2795 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
2792 2796
2793 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 2797 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2794 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 2798 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2795 2799
2796 gk20a_init_vm(mm, vm, 2800 gk20a_init_vm(mm, vm, big_page_size,
2797 SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); 2801 SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system");
2798 2802
2799 gk20a_dbg_info("pde pa=0x%llx", 2803 gk20a_dbg_info("pde pa=0x%llx",
@@ -2842,6 +2846,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2842 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 2846 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2843 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); 2847 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2844 2848
2849 if (g->ops.mm.set_big_page_size)
2850 g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size);
2851
2845 return 0; 2852 return 0;
2846 2853
2847clean_up_inst_block: 2854clean_up_inst_block:
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b28daef7..3f7042ee 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -512,7 +512,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
512/* vm-as interface */ 512/* vm-as interface */
513struct nvgpu_as_alloc_space_args; 513struct nvgpu_as_alloc_space_args;
514struct nvgpu_as_free_space_args; 514struct nvgpu_as_free_space_args;
515int gk20a_vm_alloc_share(struct gk20a_as_share *as_share); 515int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags);
516int gk20a_vm_release_share(struct gk20a_as_share *as_share); 516int gk20a_vm_release_share(struct gk20a_as_share *as_share);
517int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, 517int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
518 struct nvgpu_as_alloc_space_args *args); 518 struct nvgpu_as_alloc_space_args *args);
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index ce0209fb..aada1537 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -76,6 +76,9 @@ struct gk20a_platform {
76 /* Adaptative ELPG: true = enable flase = disable */ 76 /* Adaptative ELPG: true = enable flase = disable */
77 bool enable_aelpg; 77 bool enable_aelpg;
78 78
79 /* Default big page size 64K or 128K */
80 u32 default_big_page_size;
81
79 /* Initialize the platform interface of the gk20a driver. 82 /* Initialize the platform interface of the gk20a driver.
80 * 83 *
81 * The platform implementation of this function must 84 * The platform implementation of this function must
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index 5513ea43..ccbf932f 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -443,6 +443,7 @@ struct gk20a_platform t132_gk20a_tegra_platform = {
443 .enable_elpg = true, 443 .enable_elpg = true,
444 .enable_aelpg = true, 444 .enable_aelpg = true,
445 445
446 .default_big_page_size = SZ_128K,
446 447
447 .probe = gk20a_tegra_probe, 448 .probe = gk20a_tegra_probe,
448 .late_probe = gk20a_tegra_late_probe, 449 .late_probe = gk20a_tegra_late_probe,
@@ -480,6 +481,8 @@ struct gk20a_platform gk20a_tegra_platform = {
480 .enable_elpg = true, 481 .enable_elpg = true,
481 .enable_aelpg = true, 482 .enable_aelpg = true,
482 483
484 .default_big_page_size = SZ_128K,
485
483 .probe = gk20a_tegra_probe, 486 .probe = gk20a_tegra_probe,
484 .late_probe = gk20a_tegra_late_probe, 487 .late_probe = gk20a_tegra_late_probe,
485 488
@@ -517,6 +520,8 @@ struct gk20a_platform gm20b_tegra_platform = {
517 .enable_elpg = true, 520 .enable_elpg = true,
518 .enable_aelpg = true, 521 .enable_aelpg = true,
519 522
523 .default_big_page_size = SZ_128K,
524
520 .probe = gk20a_tegra_probe, 525 .probe = gk20a_tegra_probe,
521 .late_probe = gk20a_tegra_late_probe, 526 .late_probe = gk20a_tegra_late_probe,
522 527
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
index 34ad6418..a2aa81d8 100644
--- a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
@@ -82,9 +82,18 @@ void gm20b_init_kind_attr(void)
82 } 82 }
83} 83}
84 84
85static void gm20b_fb_set_mmu_page_size(struct gk20a *g)
86{
87 /* set large page size in fb */
88 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
89 fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f();
90 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
91}
92
85void gm20b_init_fb(struct gpu_ops *gops) 93void gm20b_init_fb(struct gpu_ops *gops)
86{ 94{
87 gops->fb.init_fs_state = fb_gm20b_init_fs_state; 95 gops->fb.init_fs_state = fb_gm20b_init_fs_state;
96 gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size;
88 gm20b_init_uncompressed_kind_map(); 97 gm20b_init_uncompressed_kind_map();
89 gm20b_init_kind_attr(); 98 gm20b_init_kind_attr();
90} 99}
diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
index 817e4fc4..7655d2a3 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
@@ -66,6 +66,10 @@ static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void)
66{ 66{
67 return 0x0; 67 return 0x0;
68} 68}
69static inline u32 fb_mmu_ctrl_vm_pg_size_64kb_f(void)
70{
71 return 0x1;
72}
69static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r) 73static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r)
70{ 74{
71 return (r >> 15) & 0x1; 75 return (r >> 15) & 0x1;
@@ -78,6 +82,18 @@ static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r)
78{ 82{
79 return (r >> 16) & 0xff; 83 return (r >> 16) & 0xff;
80} 84}
85static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_v(u32 r)
86{
87 return (r >> 11) & 0x1;
88}
89static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_true_f(void)
90{
91 return 0x800;
92}
93static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_false_f(void)
94{
95 return 0x0;
96}
81static inline u32 fb_priv_mmu_phy_secure_r(void) 97static inline u32 fb_priv_mmu_phy_secure_r(void)
82{ 98{
83 return 0x00100ce4; 99 return 0x00100ce4;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h
index 6debecda..2e1df1d4 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h
@@ -78,6 +78,26 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void)
78{ 78{
79 return 0x4; 79 return 0x4;
80} 80}
81static inline u32 ram_in_big_page_size_f(u32 v)
82{
83 return (v & 0x1) << 11;
84}
85static inline u32 ram_in_big_page_size_m(void)
86{
87 return 0x1 << 11;
88}
89static inline u32 ram_in_big_page_size_w(void)
90{
91 return 128;
92}
93static inline u32 ram_in_big_page_size_128kb_f(void)
94{
95 return 0x0;
96}
97static inline u32 ram_in_big_page_size_64kb_f(void)
98{
99 return 0x800;
100}
81static inline u32 ram_in_page_dir_base_lo_f(u32 v) 101static inline u32 ram_in_page_dir_base_lo_f(u32 v)
82{ 102{
83 return (v & 0xfffff) << 12; 103 return (v & 0xfffff) << 12;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index b4622c0b..13e7859f 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -19,6 +19,7 @@
19#include "hw_gmmu_gm20b.h" 19#include "hw_gmmu_gm20b.h"
20#include "hw_fb_gm20b.h" 20#include "hw_fb_gm20b.h"
21#include "hw_gr_gm20b.h" 21#include "hw_gr_gm20b.h"
22#include "hw_ram_gm20b.h"
22 23
23static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, 24static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
24 enum gmmu_pgsz_gk20a pgsz_idx, 25 enum gmmu_pgsz_gk20a pgsz_idx,
@@ -259,6 +260,25 @@ bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g)
259 gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(); 260 gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v();
260} 261}
261 262
263void gm20b_mm_set_big_page_size(struct gk20a *g, void *inst_ptr, int size)
264{
265 u32 val;
266
267 gk20a_dbg_fn("");
268
269 gk20a_dbg_info("big page size %d\n", size);
270 val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w());
271 val &= ~ram_in_big_page_size_m();
272
273 if (size == SZ_64K)
274 val |= ram_in_big_page_size_64kb_f();
275 else
276 val |= ram_in_big_page_size_128kb_f();
277
278 gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val);
279 gk20a_dbg_fn("done");
280}
281
262void gm20b_init_mm(struct gpu_ops *gops) 282void gm20b_init_mm(struct gpu_ops *gops)
263{ 283{
264 gops->mm.set_sparse = gm20b_vm_put_sparse; 284 gops->mm.set_sparse = gm20b_vm_put_sparse;
@@ -273,4 +293,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
273 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; 293 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
274 gops->mm.l2_flush = gk20a_mm_l2_flush; 294 gops->mm.l2_flush = gk20a_mm_l2_flush;
275 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; 295 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
296 gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
276} 297}
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 694c497c..c9e50b36 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -175,6 +175,12 @@ struct nvgpu_gpu_mark_compressible_write_args {
175 __u32 reserved[3]; /* must be zero */ 175 __u32 reserved[3]; /* must be zero */
176}; 176};
177 177
178struct nvgpu_alloc_as_args {
179 __u32 big_page_size;
180 __s32 as_fd;
181 __u64 reserved; /* must be zero */
182};
183
178#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ 184#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
179 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) 185 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
180#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ 186#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -189,9 +195,11 @@ struct nvgpu_gpu_mark_compressible_write_args {
189 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 6, struct nvgpu_gpu_prepare_compressible_read_args) 195 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 6, struct nvgpu_gpu_prepare_compressible_read_args)
190#define NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE \ 196#define NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE \
191 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 7, struct nvgpu_gpu_mark_compressible_write_args) 197 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 7, struct nvgpu_gpu_mark_compressible_write_args)
198#define NVGPU_GPU_IOCTL_ALLOC_AS \
199 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 8, struct nvgpu_alloc_as_args)
192 200
193#define NVGPU_GPU_IOCTL_LAST \ 201#define NVGPU_GPU_IOCTL_LAST \
194 _IOC_NR(NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE) 202 _IOC_NR(NVGPU_GPU_IOCTL_ALLOC_AS)
195#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ 203#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
196 sizeof(struct nvgpu_gpu_prepare_compressible_read_args) 204 sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
197 205