From 2eb6dcb4694c8b83e22c50d9fd4d3fdd85b93c46 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Thu, 16 Oct 2014 15:15:11 +0300 Subject: gpu: nvgpu: Implement 64k large page support Implement support for 64kB large page size. Add an API to create an address space via IOCTL so that we can accept flags, and assign one flag for enabling 64kB large page size. Also adds APIs to set per-context large page size. This is possible only on Maxwell, so return error if caller tries to set large page size on Kepler. Default large page size is still 128kB. Change-Id: I20b51c8f6d4a984acae8411ace3de9000c78e82f Signed-off-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/as_gk20a.c | 8 ++-- drivers/gpu/nvgpu/gk20a/as_gk20a.h | 2 + drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 4 ++ drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 53 ++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/fb_gk20a.c | 14 +++++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 6 ++- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 41 +++++++++++--------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/platform_gk20a.h | 3 ++ drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 5 +++ drivers/gpu/nvgpu/gm20b/fb_gm20b.c | 9 +++++ drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h | 16 ++++++++ drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h | 20 ++++++++++ drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 21 ++++++++++ 14 files changed, 181 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 5ca7c806..74d83a7d 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -37,8 +37,8 @@ static void release_as_share_id(struct gk20a_as *as, int id) return; } -static int gk20a_as_alloc_share(struct gk20a_as *as, - struct gk20a_as_share **out) +int gk20a_as_alloc_share(struct gk20a_as *as, + u32 flags, struct gk20a_as_share **out) { struct gk20a *g = gk20a_from_as(as); struct gk20a_as_share *as_share; @@ -56,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as, as_share->ref_cnt.counter = 1; /* this will set as_share->vm. */ - err = g->ops.mm.vm_alloc_share(as_share); + err = g->ops.mm.vm_alloc_share(as_share, flags); if (err) goto failed; @@ -186,7 +186,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp) return err; } - err = gk20a_as_alloc_share(&g->as, &as_share); + err = gk20a_as_alloc_share(&g->as, 0, &as_share); if (err) { gk20a_dbg_fn("failed to alloc share"); gk20a_put_client(g); diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h index 457678ce..166000a8 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h @@ -42,5 +42,7 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share); int gk20a_as_dev_open(struct inode *inode, struct file *filp); int gk20a_as_dev_release(struct inode *inode, struct file *filp); long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +int gk20a_as_alloc_share(struct gk20a_as *as, + u32 flags, struct gk20a_as_share **out); #endif diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0e8eb497..bcc05079 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -119,6 +119,10 @@ int channel_gk20a_commit_va(struct channel_gk20a *c) gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit))); + if (c->g->ops.mm.set_big_page_size) + c->g->ops.mm.set_big_page_size(c->g, inst_ptr, + c->vm->gmmu_page_sizes[gmmu_page_size_big]); + return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index ca587d00..6969a3a7 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include "gk20a.h" @@ -148,6 +150,53 @@ static int gk20a_ctrl_mark_compressible_write( return ret; } +static int gk20a_ctrl_alloc_as( + struct gk20a *g, + struct nvgpu_alloc_as_args *args) +{ + struct platform_device *dev = g->dev; + struct gk20a_as_share *as_share; + int err; + int fd; + struct file *file; + char *name; + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + return err; + fd = err; + + name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d", + dev_name(&dev->dev), fd); + + file = anon_inode_getfile(name, g->as.cdev.ops, NULL, O_RDWR); + kfree(name); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto clean_up; + } + fd_install(fd, file); + + err = gk20a_get_client(g); + if (err) + goto clean_up; + + err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share); + if (err) + goto clean_up_client; + + file->private_data = as_share; + + args->as_fd = fd; + return 0; + +clean_up_client: + gk20a_put_client(g); +clean_up: + put_unused_fd(fd); + return err; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct platform_device *dev = filp->private_data; @@ -309,6 +358,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg err = gk20a_ctrl_mark_compressible_write(g, (struct nvgpu_gpu_mark_compressible_write_args *)buf); break; + case NVGPU_GPU_IOCTL_ALLOC_AS: + err = gk20a_ctrl_alloc_as(g, + (struct nvgpu_alloc_as_args *)buf); + break; default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index 52f2db4d..d5b3fd87 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c @@ -18,6 +18,7 @@ #include "gk20a.h" #include "kind_gk20a.h" #include "hw_mc_gk20a.h" +#include "hw_fb_gk20a.h" static void fb_gk20a_reset(struct gk20a *g) { @@ -29,9 +30,22 @@ static void fb_gk20a_reset(struct gk20a *g) | mc_enable_hub_enabled_f()); } +static void gk20a_fb_set_mmu_page_size(struct gk20a *g) +{ + /* set large page size in fb */ + u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); + + fb_mmu_ctrl = (fb_mmu_ctrl & + ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | + fb_mmu_ctrl_vm_pg_size_128kb_f(); + + gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); +} + void gk20a_init_fb(struct gpu_ops *gops) { gops->fb.reset = fb_gk20a_reset; + gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size; gk20a_init_uncompressed_kind_map(); gk20a_init_kind_attr(); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8ebf6711..04a4cf66 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -141,6 +141,7 @@ struct gpu_ops { void (*reset)(struct gk20a *g); void (*init_uncompressed_kind_map)(struct gk20a *g); void (*init_kind_attr)(struct gk20a *g); + void (*set_mmu_page_size)(struct gk20a *g); } fb; struct { void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod); @@ -291,13 +292,16 @@ struct gpu_ops { bool va_allocated, int rw_flag); void (*vm_remove)(struct vm_gk20a *vm); - int (*vm_alloc_share)(struct gk20a_as_share *as_share); + int (*vm_alloc_share)(struct gk20a_as_share *as_share, + u32 flags); int (*vm_bind_channel)(struct gk20a_as_share *as_share, struct channel_gk20a *ch); int (*fb_flush)(struct gk20a *g); void (*l2_invalidate)(struct gk20a *g); void (*l2_flush)(struct gk20a *g, bool invalidate); void (*tlb_invalidate)(struct vm_gk20a *vm); + void (*set_big_page_size)(struct gk20a *g, + void *inst_ptr, int size); } mm; struct { int (*prepare_ucode)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 70f4294b..e7fdb336 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -327,17 +327,7 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g) gk20a_dbg_fn(""); - /* set large page size in fb - * note this is very early on, can we defer it ? */ - { - u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); - - fb_mmu_ctrl = (fb_mmu_ctrl & - ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | - fb_mmu_ctrl_vm_pg_size_128kb_f(); - - gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); - } + g->ops.fb.set_mmu_page_size(g); inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); @@ -2173,6 +2163,7 @@ void gk20a_vm_put(struct vm_gk20a *vm) static int gk20a_init_vm(struct mm_gk20a *mm, struct vm_gk20a *vm, + u32 big_page_size, u64 low_hole, u64 aperture_size, bool big_pages, @@ -2184,7 +2175,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm, size_t vma_size; /* note: keep the page sizes sorted lowest to highest here */ - u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; + u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; vm->mm = mm; @@ -2331,7 +2322,7 @@ clean_up_pdes: } /* address space interfaces for the gk20a module */ -int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) +int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) { struct gk20a_as *as = as_share->as; struct gk20a *g = gk20a_from_as(as); @@ -2351,8 +2342,15 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) vm->enable_ctag = true; snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); - err = gk20a_init_vm(mm, vm, - SZ_128K << 10, mm->channel.size, true, name); + + if (big_page_size && !g->ops.mm.set_big_page_size) + return -EINVAL; + if (big_page_size == 0) + big_page_size = + gk20a_get_platform(g->dev)->default_big_page_size; + + err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, + mm->channel.size, true, name); return 0; } @@ -2709,10 +2707,12 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) struct device *d = dev_from_gk20a(g); struct inst_desc *inst_block = &mm->bar1.inst_block; dma_addr_t iova; + u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); - gk20a_init_vm(mm, vm, SZ_4K, mm->bar1.aperture_size, false, "bar1"); + gk20a_init_vm(mm, vm, big_page_size, SZ_4K, + mm->bar1.aperture_size, false, "bar1"); gk20a_dbg_info("pde pa=0x%llx", (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); @@ -2761,6 +2761,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); + if (g->ops.mm.set_big_page_size) + g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); + gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); return 0; @@ -2789,11 +2792,12 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) struct device *d = dev_from_gk20a(g); struct inst_desc *inst_block = &mm->pmu.inst_block; dma_addr_t iova; + u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); - gk20a_init_vm(mm, vm, + gk20a_init_vm(mm, vm, big_page_size, SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); gk20a_dbg_info("pde pa=0x%llx", @@ -2842,6 +2846,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); + if (g->ops.mm.set_big_page_size) + g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); + return 0; clean_up_inst_block: diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b28daef7..3f7042ee 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -512,7 +512,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, /* vm-as interface */ struct nvgpu_as_alloc_space_args; struct nvgpu_as_free_space_args; -int gk20a_vm_alloc_share(struct gk20a_as_share *as_share); +int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags); int gk20a_vm_release_share(struct gk20a_as_share *as_share); int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, struct nvgpu_as_alloc_space_args *args); diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index ce0209fb..aada1537 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h @@ -76,6 +76,9 @@ struct gk20a_platform { /* Adaptative ELPG: true = enable flase = disable */ bool enable_aelpg; + /* Default big page size 64K or 128K */ + u32 default_big_page_size; + /* Initialize the platform interface of the gk20a driver. * * The platform implementation of this function must diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index 5513ea43..ccbf932f 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c @@ -443,6 +443,7 @@ struct gk20a_platform t132_gk20a_tegra_platform = { .enable_elpg = true, .enable_aelpg = true, + .default_big_page_size = SZ_128K, .probe = gk20a_tegra_probe, .late_probe = gk20a_tegra_late_probe, @@ -480,6 +481,8 @@ struct gk20a_platform gk20a_tegra_platform = { .enable_elpg = true, .enable_aelpg = true, + .default_big_page_size = SZ_128K, + .probe = gk20a_tegra_probe, .late_probe = gk20a_tegra_late_probe, @@ -517,6 +520,8 @@ struct gk20a_platform gm20b_tegra_platform = { .enable_elpg = true, .enable_aelpg = true, + .default_big_page_size = SZ_128K, + .probe = gk20a_tegra_probe, .late_probe = gk20a_tegra_late_probe, diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c index 34ad6418..a2aa81d8 100644 --- a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c @@ -82,9 +82,18 @@ void gm20b_init_kind_attr(void) } } +static void gm20b_fb_set_mmu_page_size(struct gk20a *g) +{ + /* set large page size in fb */ + u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); + fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f(); + gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); +} + void gm20b_init_fb(struct gpu_ops *gops) { gops->fb.init_fs_state = fb_gm20b_init_fs_state; + gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size; gm20b_init_uncompressed_kind_map(); gm20b_init_kind_attr(); } diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h index 817e4fc4..7655d2a3 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h @@ -66,6 +66,10 @@ static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void) { return 0x0; } +static inline u32 fb_mmu_ctrl_vm_pg_size_64kb_f(void) +{ + return 0x1; +} static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r) { return (r >> 15) & 0x1; @@ -78,6 +82,18 @@ static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r) { return (r >> 16) & 0xff; } +static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_v(u32 r) +{ + return (r >> 11) & 0x1; +} +static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_true_f(void) +{ + return 0x800; +} +static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_false_f(void) +{ + return 0x0; +} static inline u32 fb_priv_mmu_phy_secure_r(void) { return 0x00100ce4; diff --git a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h index 6debecda..2e1df1d4 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h @@ -78,6 +78,26 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void) { return 0x4; } +static inline u32 ram_in_big_page_size_f(u32 v) +{ + return (v & 0x1) << 11; +} +static inline u32 ram_in_big_page_size_m(void) +{ + return 0x1 << 11; +} +static inline u32 ram_in_big_page_size_w(void) +{ + return 128; +} +static inline u32 ram_in_big_page_size_128kb_f(void) +{ + return 0x0; +} +static inline u32 ram_in_big_page_size_64kb_f(void) +{ + return 0x800; +} static inline u32 ram_in_page_dir_base_lo_f(u32 v) { return (v & 0xfffff) << 12; diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index b4622c0b..13e7859f 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -19,6 +19,7 @@ #include "hw_gmmu_gm20b.h" #include "hw_fb_gm20b.h" #include "hw_gr_gm20b.h" +#include "hw_ram_gm20b.h" static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, @@ -259,6 +260,25 @@ bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g) gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(); } +void gm20b_mm_set_big_page_size(struct gk20a *g, void *inst_ptr, int size) +{ + u32 val; + + gk20a_dbg_fn(""); + + gk20a_dbg_info("big page size %d\n", size); + val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w()); + val &= ~ram_in_big_page_size_m(); + + if (size == SZ_64K) + val |= ram_in_big_page_size_64kb_f(); + else + val |= ram_in_big_page_size_128kb_f(); + + gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val); + gk20a_dbg_fn("done"); +} + void gm20b_init_mm(struct gpu_ops *gops) { gops->mm.set_sparse = gm20b_vm_put_sparse; @@ -273,4 +293,5 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; gops->mm.l2_flush = gk20a_mm_l2_flush; gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; + gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; } -- cgit v1.2.2