gpu: nvgpu: vgpu: Alloc kernel address space

JIRA VFND-890 Change-Id: I8eba041b663cead94f2cc3d75d6458d472f1a755 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: http://git-master/r/815378 (cherry picked from commit 4b52329e955758ec4368abcb463ce4e3a2653237) Reviewed-on: http://git-master/r/820499
author: Terje Bergstrom <tbergstrom@nvidia.com> 2015-10-09 14:53:19 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2015-10-22 12:27:30 -0400
commit: 37255d42cc1eee1dc1de94bd651461a46c8afbe9 (patch)
tree: fa78d6b0e7caa739474d1a7dc82f6412148ad542 /drivers
parent: fb3a1d31cd84771b659050ea1aa920bdf06ffb04 (diff)
2 files changed, 120 insertions, 36 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index b259a0c3..a4ec5254 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -144,7 +144,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        /* Circular Buffer */
        gpu_va = gk20a_vm_alloc_va(ch_vm,
-                                gr->global_ctx_buffer[CIRCULAR].mem.size, 0);
+                        gr->global_ctx_buffer[CIRCULAR].mem.size,
+                        gmmu_page_size_kernel);
        if (!gpu_va)
                goto clean_up;
@@ -153,7 +154,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        /* Attribute Buffer */
        gpu_va = gk20a_vm_alloc_va(ch_vm,
-                                gr->global_ctx_buffer[ATTRIBUTE].mem.size, 0);
+                        gr->global_ctx_buffer[ATTRIBUTE].mem.size,
+                        gmmu_page_size_kernel);
        if (!gpu_va)
                goto clean_up;
@@ -162,7 +164,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        /* Page Pool */
        gpu_va = gk20a_vm_alloc_va(ch_vm,
-                        gr->global_ctx_buffer[PAGEPOOL].mem.size, 0);
+                        gr->global_ctx_buffer[PAGEPOOL].mem.size,
+                        gmmu_page_size_kernel);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[PAGEPOOL_VA] = gpu_va;
@@ -170,7 +173,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
        /* Priv register Access Map */
        gpu_va = gk20a_vm_alloc_va(ch_vm,
-                        gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size, 0);
+                        gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size,
+                        gmmu_page_size_kernel);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
@@ -257,7 +261,9 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
        gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
        gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
-        gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm, gr_ctx->mem.size, 0);
+        gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm,
+                                                gr_ctx->mem.size,
+                                                gmmu_page_size_kernel);
        if (!gr_ctx->mem.gpu_va) {
                kfree(gr_ctx);
@@ -351,7 +357,8 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
        patch_ctx->mem.size = 128 * sizeof(u32);
        patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm,
-                                                  patch_ctx->mem.size, 0);
+                                                patch_ctx->mem.size,
+                                                gmmu_page_size_kernel);
        if (!patch_ctx->mem.gpu_va)
                return -ENOMEM;
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index b5846043..c36b135c 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -99,7 +99,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
                map_offset = gk20a_vm_alloc_va(vm, size,
                                          pgsz_idx);
                if (!map_offset) {
-                        gk20a_err(d, "failed to allocate va space");
+                        gk20a_err(d, "failed to allocate va space\n");
                        err = -ENOMEM;
                        goto fail;
                }
@@ -118,6 +118,20 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
        p->addr = addr;
        p->gpu_va = map_offset;
        p->size = size;
+        if (pgsz_idx == gmmu_page_size_kernel) {
+                u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
+                if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) {
+                        pgsz_idx = gmmu_page_size_small;
+                } else if (page_size ==
+                                vm->gmmu_page_sizes[gmmu_page_size_big]) {
+                        pgsz_idx = gmmu_page_size_big;
+                } else {
+                        gk20a_err(d, "invalid kernel page size %d\n",
+                                page_size);
+                        goto fail;
+                }
+        }
        p->pgsz_idx = pgsz_idx;
        p->iova = mapping ? 1 : 0;
        p->kind = kind_v;
@@ -127,7 +141,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
        p->ctag_offset = ctag_offset;
        p->clear_ctags = clear_ctags;
        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-        if (err || msg.ret)
+        err = err ? err : msg.ret;
+        if (err)
                goto fail;
        /* TLB invalidate handled on server side */
@@ -214,8 +229,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
        err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
        WARN_ON(err || msg.ret);
-        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]);
-        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
+        if (vm->vma[gmmu_page_size_small].init)
+                gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+        if (vm->vma[gmmu_page_size_big].init)
+                gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
        mutex_unlock(&vm->update_gmmu_lock);
@@ -258,14 +276,16 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
        struct mm_gk20a *mm = &g->mm;
        struct vm_gk20a *vm;
-        u64 small_vma_size, large_vma_size;
+        u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
+                kernel_vma_start, kernel_vma_limit;
        char name[32];
        int err, i;
        /* note: keep the page sizes sorted lowest to highest here */
        u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
                SZ_4K,
-                big_page_size ? big_page_size : platform->default_big_page_size
+                big_page_size ? big_page_size : platform->default_big_page_size,
+                SZ_4K
        };
        gk20a_dbg_fn("");
@@ -288,8 +308,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        vm->big_page_size = big_page_size;
        vm->va_start  = big_page_size << 10;   /* create a one pde hole */
-        vm->va_limit  = mm->channel.user_size; /* note this means channel.size
+        vm->va_limit  = mm->channel.user_size + mm->channel.kernel_size;
-                                                  is really just the max */
        msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
        msg.handle = platform->virt_handle;
@@ -303,34 +322,88 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        vm->handle = p->handle;
-        /* First 16GB of the address space goes towards small pages. What ever
+        /* setup vma limits */
-         * remains is allocated to large pages. */
+        small_vma_start = vm->va_start;
-        small_vma_size = (u64)16 << 30;
-        large_vma_size = vm->va_limit - small_vma_size;
+        if (vm->big_pages) {
+                /* First 16GB of the address space goes towards small
+                 * pages. The kernel reserved pages are at the end.
+                 * What ever remains is allocated to large pages.
+                 */
+                small_vma_limit = __nv_gmmu_va_small_page_limit();
+                large_vma_start = small_vma_limit;
+                large_vma_limit = vm->va_limit - mm->channel.kernel_size;
+        } else {
+                small_vma_limit = vm->va_limit - mm->channel.kernel_size;
+                large_vma_start = 0;
+                large_vma_limit = 0;
+        }
-        snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+        kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
-                 gmmu_page_sizes[gmmu_page_size_small]>>10);
+        kernel_vma_limit = vm->va_limit;
-        err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
-                                     vm, name,
+        gk20a_dbg_info(
-                                     vm->va_start,
+                "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
-                                     small_vma_size - vm->va_start,
+                small_vma_start, small_vma_limit,
-                                     SZ_4K,
+                large_vma_start, large_vma_limit,
-                                     GPU_BALLOC_MAX_ORDER,
+                kernel_vma_start, kernel_vma_limit);
-                                     GPU_BALLOC_GVA_SPACE);
-        if (err)
+        /* check that starts do not exceed limits */
+        WARN_ON(small_vma_start > small_vma_limit);
+        WARN_ON(large_vma_start > large_vma_limit);
+        /* kernel_vma must also be non-zero */
+        WARN_ON(kernel_vma_start >= kernel_vma_limit);
+        if (small_vma_start > small_vma_limit ||
+            large_vma_start > large_vma_limit ||
+            kernel_vma_start >= kernel_vma_limit) {
+                err = -EINVAL;
                goto clean_up_share;
+        }
+        if (small_vma_start < small_vma_limit) {
+                snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+                         gmmu_page_sizes[gmmu_page_size_small] >> 10);
+                err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
+                                        vm, name,
+                                        small_vma_start,
+                                        small_vma_limit - small_vma_start,
+                                        SZ_4K,
+                                        GPU_BALLOC_MAX_ORDER,
+                                        GPU_BALLOC_GVA_SPACE);
+                if (err)
+                        goto clean_up_share;
+        }
+        if (large_vma_start < large_vma_limit) {
+                snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+                        gmmu_page_sizes[gmmu_page_size_big] >> 10);
+                err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
+                                        vm, name,
+                                        large_vma_start,
+                                        large_vma_limit - large_vma_start,
+                                        big_page_size,
+                                        GPU_BALLOC_MAX_ORDER,
+                                        GPU_BALLOC_GVA_SPACE);
+                if (err)
+                        goto clean_up_small_allocator;
+        }
-        snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+        snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
-                gmmu_page_sizes[gmmu_page_size_big]>>10);
+                 gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
-        err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
+        /*
+         * kernel reserved VMA is at the end of the aperture
+         */
+        err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel],
                                     vm, name,
-                                     small_vma_size,
+                                     kernel_vma_start,
-                                     large_vma_size,
+                                     kernel_vma_limit - kernel_vma_start,
-                                     big_page_size,
+                                     SZ_4K,
                                     GPU_BALLOC_MAX_ORDER,
                                     GPU_BALLOC_GVA_SPACE);
        if (err)
-                goto clean_up_small_allocator;
+                goto clean_up_big_allocator;
        vm->mapped_buffers = RB_ROOT;
@@ -342,8 +415,12 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
        return 0;
+clean_up_big_allocator:
+        if (large_vma_start < large_vma_limit)
+                gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
 clean_up_small_allocator:
-        gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+        if (small_vma_start < small_vma_limit)
+                gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
 clean_up_share:
        msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
        msg.handle = platform->virt_handle;
author	Terje Bergstrom <tbergstrom@nvidia.com>	2015-10-09 14:53:19 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2015-10-22 12:27:30 -0400
commit	37255d42cc1eee1dc1de94bd651461a46c8afbe9 (patch)
tree	fa78d6b0e7caa739474d1a7dc82f6412148ad542 /drivers
parent	fb3a1d31cd84771b659050ea1aa920bdf06ffb04 (diff)

diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index b259a0c3..a4ec5254 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -144,7 +144,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
144		144
145	/* Circular Buffer */	145	/* Circular Buffer */
146	gpu_va = gk20a_vm_alloc_va(ch_vm,	146	gpu_va = gk20a_vm_alloc_va(ch_vm,
147	gr->global_ctx_buffer[CIRCULAR].mem.size, 0);	147	gr->global_ctx_buffer[CIRCULAR].mem.size,
		148	gmmu_page_size_kernel);
148		149
149	if (!gpu_va)	150	if (!gpu_va)
150	goto clean_up;	151	goto clean_up;
@@ -153,7 +154,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
153		154
154	/* Attribute Buffer */	155	/* Attribute Buffer */
155	gpu_va = gk20a_vm_alloc_va(ch_vm,	156	gpu_va = gk20a_vm_alloc_va(ch_vm,
156	gr->global_ctx_buffer[ATTRIBUTE].mem.size, 0);	157	gr->global_ctx_buffer[ATTRIBUTE].mem.size,
		158	gmmu_page_size_kernel);
157		159
158	if (!gpu_va)	160	if (!gpu_va)
159	goto clean_up;	161	goto clean_up;
@@ -162,7 +164,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
162		164
163	/* Page Pool */	165	/* Page Pool */
164	gpu_va = gk20a_vm_alloc_va(ch_vm,	166	gpu_va = gk20a_vm_alloc_va(ch_vm,
165	gr->global_ctx_buffer[PAGEPOOL].mem.size, 0);	167	gr->global_ctx_buffer[PAGEPOOL].mem.size,
		168	gmmu_page_size_kernel);
166	if (!gpu_va)	169	if (!gpu_va)
167	goto clean_up;	170	goto clean_up;
168	g_bfr_va[PAGEPOOL_VA] = gpu_va;	171	g_bfr_va[PAGEPOOL_VA] = gpu_va;
@@ -170,7 +173,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
170		173
171	/* Priv register Access Map */	174	/* Priv register Access Map */
172	gpu_va = gk20a_vm_alloc_va(ch_vm,	175	gpu_va = gk20a_vm_alloc_va(ch_vm,
173	gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size, 0);	176	gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size,
		177	gmmu_page_size_kernel);
174	if (!gpu_va)	178	if (!gpu_va)
175	goto clean_up;	179	goto clean_up;
176	g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;	180	g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
@@ -257,7 +261,9 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
257	gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;	261	gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
258		262
259	gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;	263	gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
260	gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm, gr_ctx->mem.size, 0);	264	gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm,
		265	gr_ctx->mem.size,
		266	gmmu_page_size_kernel);
261		267
262	if (!gr_ctx->mem.gpu_va) {	268	if (!gr_ctx->mem.gpu_va) {
263	kfree(gr_ctx);	269	kfree(gr_ctx);
@@ -351,7 +357,8 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
351		357
352	patch_ctx->mem.size = 128 * sizeof(u32);	358	patch_ctx->mem.size = 128 * sizeof(u32);
353	patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm,	359	patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm,
354	patch_ctx->mem.size, 0);	360	patch_ctx->mem.size,
		361	gmmu_page_size_kernel);
355	if (!patch_ctx->mem.gpu_va)	362	if (!patch_ctx->mem.gpu_va)
356	return -ENOMEM;	363	return -ENOMEM;
357		364


diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index b5846043..c36b135c 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -99,7 +99,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
99	map_offset = gk20a_vm_alloc_va(vm, size,	99	map_offset = gk20a_vm_alloc_va(vm, size,
100	pgsz_idx);	100	pgsz_idx);
101	if (!map_offset) {	101	if (!map_offset) {
102	gk20a_err(d, "failed to allocate va space");	102	gk20a_err(d, "failed to allocate va space\n");
103	err = -ENOMEM;	103	err = -ENOMEM;
104	goto fail;	104	goto fail;
105	}	105	}
@@ -118,6 +118,20 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
118	p->addr = addr;	118	p->addr = addr;
119	p->gpu_va = map_offset;	119	p->gpu_va = map_offset;
120	p->size = size;	120	p->size = size;
		121	if (pgsz_idx == gmmu_page_size_kernel) {
		122	u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
		123
		124	if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) {
		125	pgsz_idx = gmmu_page_size_small;
		126	} else if (page_size ==
		127	vm->gmmu_page_sizes[gmmu_page_size_big]) {
		128	pgsz_idx = gmmu_page_size_big;
		129	} else {
		130	gk20a_err(d, "invalid kernel page size %d\n",
		131	page_size);
		132	goto fail;
		133	}
		134	}
121	p->pgsz_idx = pgsz_idx;	135	p->pgsz_idx = pgsz_idx;
122	p->iova = mapping ? 1 : 0;	136	p->iova = mapping ? 1 : 0;
123	p->kind = kind_v;	137	p->kind = kind_v;
@@ -127,7 +141,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
127	p->ctag_offset = ctag_offset;	141	p->ctag_offset = ctag_offset;
128	p->clear_ctags = clear_ctags;	142	p->clear_ctags = clear_ctags;
129	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));	143	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
130	if (err \|\| msg.ret)	144	err = err ? err : msg.ret;
		145	if (err)
131	goto fail;	146	goto fail;
132		147
133	/* TLB invalidate handled on server side */	148	/* TLB invalidate handled on server side */
@@ -214,8 +229,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
214	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));	229	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
215	WARN_ON(err \|\| msg.ret);	230	WARN_ON(err \|\| msg.ret);
216		231
217	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);	232	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]);
218	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);	233	if (vm->vma[gmmu_page_size_small].init)
		234	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
		235	if (vm->vma[gmmu_page_size_big].init)
		236	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
219		237
220	mutex_unlock(&vm->update_gmmu_lock);	238	mutex_unlock(&vm->update_gmmu_lock);
221		239
@@ -258,14 +276,16 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
258	struct tegra_vgpu_as_share_params *p = &msg.params.as_share;	276	struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
259	struct mm_gk20a *mm = &g->mm;	277	struct mm_gk20a *mm = &g->mm;
260	struct vm_gk20a *vm;	278	struct vm_gk20a *vm;
261	u64 small_vma_size, large_vma_size;	279	u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
		280	kernel_vma_start, kernel_vma_limit;
262	char name[32];	281	char name[32];
263	int err, i;	282	int err, i;
264		283
265	/* note: keep the page sizes sorted lowest to highest here */	284	/* note: keep the page sizes sorted lowest to highest here */
266	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {	285	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
267	SZ_4K,	286	SZ_4K,
268	big_page_size ? big_page_size : platform->default_big_page_size	287	big_page_size ? big_page_size : platform->default_big_page_size,
		288	SZ_4K
269	};	289	};
270		290
271	gk20a_dbg_fn("");	291	gk20a_dbg_fn("");
@@ -288,8 +308,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
288	vm->big_page_size = big_page_size;	308	vm->big_page_size = big_page_size;
289		309
290	vm->va_start = big_page_size << 10; /* create a one pde hole */	310	vm->va_start = big_page_size << 10; /* create a one pde hole */
291	vm->va_limit = mm->channel.user_size; /* note this means channel.size	311	vm->va_limit = mm->channel.user_size + mm->channel.kernel_size;
292	is really just the max */
293		312
294	msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;	313	msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
295	msg.handle = platform->virt_handle;	314	msg.handle = platform->virt_handle;
@@ -303,34 +322,88 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
303		322
304	vm->handle = p->handle;	323	vm->handle = p->handle;
305		324
306	/* First 16GB of the address space goes towards small pages. What ever	325	/* setup vma limits */
307	* remains is allocated to large pages. */	326	small_vma_start = vm->va_start;
308	small_vma_size = (u64)16 << 30;	327
309	large_vma_size = vm->va_limit - small_vma_size;	328	if (vm->big_pages) {
		329	/* First 16GB of the address space goes towards small
		330	* pages. The kernel reserved pages are at the end.
		331	* What ever remains is allocated to large pages.
		332	*/
		333	small_vma_limit = __nv_gmmu_va_small_page_limit();
		334	large_vma_start = small_vma_limit;
		335	large_vma_limit = vm->va_limit - mm->channel.kernel_size;
		336	} else {
		337	small_vma_limit = vm->va_limit - mm->channel.kernel_size;
		338	large_vma_start = 0;
		339	large_vma_limit = 0;
		340	}
310		341
311	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,	342	kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
312	gmmu_page_sizes[gmmu_page_size_small]>>10);	343	kernel_vma_limit = vm->va_limit;
313	err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],	344
314	vm, name,	345	gk20a_dbg_info(
315	vm->va_start,	346	"small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
316	small_vma_size - vm->va_start,	347	small_vma_start, small_vma_limit,
317	SZ_4K,	348	large_vma_start, large_vma_limit,
318	GPU_BALLOC_MAX_ORDER,	349	kernel_vma_start, kernel_vma_limit);
319	GPU_BALLOC_GVA_SPACE);	350
320	if (err)	351	/* check that starts do not exceed limits */
		352	WARN_ON(small_vma_start > small_vma_limit);
		353	WARN_ON(large_vma_start > large_vma_limit);
		354	/* kernel_vma must also be non-zero */
		355	WARN_ON(kernel_vma_start >= kernel_vma_limit);
		356
		357	if (small_vma_start > small_vma_limit \|\|
		358	large_vma_start > large_vma_limit \|\|
		359	kernel_vma_start >= kernel_vma_limit) {
		360	err = -EINVAL;
321	goto clean_up_share;	361	goto clean_up_share;
		362	}
		363
		364	if (small_vma_start < small_vma_limit) {
		365	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
		366	gmmu_page_sizes[gmmu_page_size_small] >> 10);
		367
		368	err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
		369	vm, name,
		370	small_vma_start,
		371	small_vma_limit - small_vma_start,
		372	SZ_4K,
		373	GPU_BALLOC_MAX_ORDER,
		374	GPU_BALLOC_GVA_SPACE);
		375	if (err)
		376	goto clean_up_share;
		377	}
		378
		379	if (large_vma_start < large_vma_limit) {
		380	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
		381	gmmu_page_sizes[gmmu_page_size_big] >> 10);
		382	err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
		383	vm, name,
		384	large_vma_start,
		385	large_vma_limit - large_vma_start,
		386	big_page_size,
		387	GPU_BALLOC_MAX_ORDER,
		388	GPU_BALLOC_GVA_SPACE);
		389	if (err)
		390	goto clean_up_small_allocator;
		391	}
322		392
323	snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,	393	snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
324	gmmu_page_sizes[gmmu_page_size_big]>>10);	394	gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
325	err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],	395	/*
		396	* kernel reserved VMA is at the end of the aperture
		397	*/
		398	err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel],
326	vm, name,	399	vm, name,
327	small_vma_size,	400	kernel_vma_start,
328	large_vma_size,	401	kernel_vma_limit - kernel_vma_start,
329	big_page_size,	402	SZ_4K,
330	GPU_BALLOC_MAX_ORDER,	403	GPU_BALLOC_MAX_ORDER,
331	GPU_BALLOC_GVA_SPACE);	404	GPU_BALLOC_GVA_SPACE);
332	if (err)	405	if (err)
333	goto clean_up_small_allocator;	406	goto clean_up_big_allocator;
334		407
335	vm->mapped_buffers = RB_ROOT;	408	vm->mapped_buffers = RB_ROOT;
336		409
@@ -342,8 +415,12 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
342		415
343	return 0;	416	return 0;
344		417
		418	clean_up_big_allocator:
		419	if (large_vma_start < large_vma_limit)
		420	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
345	clean_up_small_allocator:	421	clean_up_small_allocator:
346	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);	422	if (small_vma_start < small_vma_limit)
		423	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
347	clean_up_share:	424	clean_up_share:
348	msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;	425	msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
349	msg.handle = platform->virt_handle;	426	msg.handle = platform->virt_handle;