diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-05-12 02:31:30 -0400 |
---|---|---|
committer | Deepak Nibade <dnibade@nvidia.com> | 2016-12-27 04:56:15 -0500 |
commit | 18a017865946617fd63256858a0d2300160643f4 (patch) | |
tree | 310822095e53bb0bb377f4955df7e2cc0f62fc7d /drivers/gpu | |
parent | e746a16f7abbaacba89e390c692620941fc1b34c (diff) |
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor
functions. This allows the functions to select the memory access method
based on the buffer aperture instead of using the cpu pointer directly
(like until now). The selection and aperture support will be in another
patch; this patch only refactors these accessors, but keeps the
underlying functionality as-is.
JIRA DNVGPU-23
Change-Id: I21d4a54827b0e2741012dfde7952c0555a583435
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1121914
GVS: Gerrit_Virtual_Submit
Reviewed-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 61 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 78 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 44 |
3 files changed, 91 insertions, 92 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index 9cb26d3f..4766e0e4 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | |||
@@ -25,24 +25,24 @@ | |||
25 | #include "hw_ram_gp10b.h" | 25 | #include "hw_ram_gp10b.h" |
26 | 26 | ||
27 | static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g, | 27 | static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g, |
28 | void *inst_ptr) | 28 | struct mem_desc *mem) |
29 | { | 29 | { |
30 | u32 val; | 30 | u32 val; |
31 | 31 | ||
32 | gk20a_dbg_fn(""); | 32 | gk20a_dbg_fn(""); |
33 | 33 | ||
34 | val = gk20a_mem_rd32(inst_ptr, | 34 | val = gk20a_mem_rd32(g, mem, |
35 | ram_in_page_dir_base_fault_replay_tex_w()); | 35 | ram_in_page_dir_base_fault_replay_tex_w()); |
36 | val &= ~ram_in_page_dir_base_fault_replay_tex_m(); | 36 | val &= ~ram_in_page_dir_base_fault_replay_tex_m(); |
37 | val |= ram_in_page_dir_base_fault_replay_tex_true_f(); | 37 | val |= ram_in_page_dir_base_fault_replay_tex_true_f(); |
38 | gk20a_mem_wr32(inst_ptr, | 38 | gk20a_mem_wr32(g, mem, |
39 | ram_in_page_dir_base_fault_replay_tex_w(), val); | 39 | ram_in_page_dir_base_fault_replay_tex_w(), val); |
40 | 40 | ||
41 | val = gk20a_mem_rd32(inst_ptr, | 41 | val = gk20a_mem_rd32(g, mem, |
42 | ram_in_page_dir_base_fault_replay_gcc_w()); | 42 | ram_in_page_dir_base_fault_replay_gcc_w()); |
43 | val &= ~ram_in_page_dir_base_fault_replay_gcc_m(); | 43 | val &= ~ram_in_page_dir_base_fault_replay_gcc_m(); |
44 | val |= ram_in_page_dir_base_fault_replay_gcc_true_f(); | 44 | val |= ram_in_page_dir_base_fault_replay_gcc_true_f(); |
45 | gk20a_mem_wr32(inst_ptr, | 45 | gk20a_mem_wr32(g, mem, |
46 | ram_in_page_dir_base_fault_replay_gcc_w(), val); | 46 | ram_in_page_dir_base_fault_replay_gcc_w(), val); |
47 | 47 | ||
48 | gk20a_dbg_fn("done"); | 48 | gk20a_dbg_fn("done"); |
@@ -52,28 +52,25 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c) | |||
52 | { | 52 | { |
53 | u32 addr_lo; | 53 | u32 addr_lo; |
54 | u32 addr_hi; | 54 | u32 addr_hi; |
55 | void *inst_ptr; | ||
56 | struct gk20a *g = c->g; | 55 | struct gk20a *g = c->g; |
57 | 56 | ||
58 | gk20a_dbg_fn(""); | 57 | gk20a_dbg_fn(""); |
59 | 58 | ||
60 | inst_ptr = c->inst_block.cpu_va; | ||
61 | if (!inst_ptr) | ||
62 | return -ENOMEM; | ||
63 | |||
64 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); | 59 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); |
65 | addr_hi = u64_hi32(c->userd_iova); | 60 | addr_hi = u64_hi32(c->userd_iova); |
66 | 61 | ||
67 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", | 62 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", |
68 | c->hw_chid, (u64)c->userd_iova); | 63 | c->hw_chid, (u64)c->userd_iova); |
69 | 64 | ||
70 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), | 65 | gk20a_mem_wr32(g, &c->inst_block, |
66 | ram_in_ramfc_w() + ram_fc_userd_w(), | ||
71 | (g->mm.vidmem_is_vidmem ? | 67 | (g->mm.vidmem_is_vidmem ? |
72 | pbdma_userd_target_sys_mem_ncoh_f() : | 68 | pbdma_userd_target_sys_mem_ncoh_f() : |
73 | pbdma_userd_target_vid_mem_f()) | | 69 | pbdma_userd_target_vid_mem_f()) | |
74 | pbdma_userd_addr_f(addr_lo)); | 70 | pbdma_userd_addr_f(addr_lo)); |
75 | 71 | ||
76 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), | 72 | gk20a_mem_wr32(g, &c->inst_block, |
73 | ram_in_ramfc_w() + ram_fc_userd_hi_w(), | ||
77 | pbdma_userd_hi_addr_f(addr_hi)); | 74 | pbdma_userd_hi_addr_f(addr_hi)); |
78 | 75 | ||
79 | return 0; | 76 | return 0; |
@@ -82,33 +79,30 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c) | |||
82 | static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, | 79 | static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, |
83 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) | 80 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) |
84 | { | 81 | { |
85 | void *inst_ptr; | 82 | struct gk20a *g = c->g; |
83 | struct mem_desc *mem = &c->inst_block; | ||
86 | 84 | ||
87 | gk20a_dbg_fn(""); | 85 | gk20a_dbg_fn(""); |
88 | 86 | ||
89 | inst_ptr = c->inst_block.cpu_va; | 87 | gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); |
90 | if (!inst_ptr) | ||
91 | return -ENOMEM; | ||
92 | 88 | ||
93 | memset(inst_ptr, 0, ram_fc_size_val_v()); | 89 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), |
94 | |||
95 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), | ||
96 | pbdma_gp_base_offset_f( | 90 | pbdma_gp_base_offset_f( |
97 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); | 91 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); |
98 | 92 | ||
99 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), | 93 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), |
100 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | | 94 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | |
101 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); | 95 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); |
102 | 96 | ||
103 | gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), | 97 | gk20a_mem_wr32(g, mem, ram_fc_signature_w(), |
104 | c->g->ops.fifo.get_pbdma_signature(c->g)); | 98 | c->g->ops.fifo.get_pbdma_signature(c->g)); |
105 | 99 | ||
106 | gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), | 100 | gk20a_mem_wr32(g, mem, ram_fc_formats_w(), |
107 | pbdma_formats_gp_fermi0_f() | | 101 | pbdma_formats_gp_fermi0_f() | |
108 | pbdma_formats_pb_fermi1_f() | | 102 | pbdma_formats_pb_fermi1_f() | |
109 | pbdma_formats_mp_fermi0_f()); | 103 | pbdma_formats_mp_fermi0_f()); |
110 | 104 | ||
111 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), | 105 | gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), |
112 | pbdma_pb_header_priv_user_f() | | 106 | pbdma_pb_header_priv_user_f() | |
113 | pbdma_pb_header_method_zero_f() | | 107 | pbdma_pb_header_method_zero_f() | |
114 | pbdma_pb_header_subchannel_zero_f() | | 108 | pbdma_pb_header_subchannel_zero_f() | |
@@ -116,26 +110,26 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, | |||
116 | pbdma_pb_header_first_true_f() | | 110 | pbdma_pb_header_first_true_f() | |
117 | pbdma_pb_header_type_inc_f()); | 111 | pbdma_pb_header_type_inc_f()); |
118 | 112 | ||
119 | gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), | 113 | gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), |
120 | pbdma_subdevice_id_f(1) | | 114 | pbdma_subdevice_id_f(1) | |
121 | pbdma_subdevice_status_active_f() | | 115 | pbdma_subdevice_status_active_f() | |
122 | pbdma_subdevice_channel_dma_enable_f()); | 116 | pbdma_subdevice_channel_dma_enable_f()); |
123 | 117 | ||
124 | gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); | 118 | gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); |
125 | 119 | ||
126 | gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), | 120 | gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), |
127 | channel_gk20a_pbdma_acquire_val(c)); | 121 | channel_gk20a_pbdma_acquire_val(c)); |
128 | 122 | ||
129 | gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), | 123 | gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), |
130 | pbdma_runlist_timeslice_timeout_128_f() | | 124 | pbdma_runlist_timeslice_timeout_128_f() | |
131 | pbdma_runlist_timeslice_timescale_3_f() | | 125 | pbdma_runlist_timeslice_timescale_3_f() | |
132 | pbdma_runlist_timeslice_enable_true_f()); | 126 | pbdma_runlist_timeslice_enable_true_f()); |
133 | 127 | ||
134 | if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) | 128 | if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) |
135 | gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr); | 129 | gp10b_set_pdb_fault_replay_flags(c->g, mem); |
136 | 130 | ||
137 | 131 | ||
138 | gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); | 132 | gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); |
139 | 133 | ||
140 | return channel_gp10b_commit_userd(c); | 134 | return channel_gp10b_commit_userd(c); |
141 | } | 135 | } |
@@ -149,14 +143,12 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g) | |||
149 | static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) | 143 | static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) |
150 | { | 144 | { |
151 | u32 new_syncpt = 0, old_syncpt; | 145 | u32 new_syncpt = 0, old_syncpt; |
152 | void *inst_ptr; | ||
153 | u32 v; | 146 | u32 v; |
154 | 147 | ||
155 | gk20a_dbg_fn(""); | 148 | gk20a_dbg_fn(""); |
156 | 149 | ||
157 | inst_ptr = c->inst_block.cpu_va; | 150 | v = gk20a_mem_rd32(c->g, &c->inst_block, |
158 | 151 | ram_fc_allowed_syncpoints_w()); | |
159 | v = gk20a_mem_rd32(inst_ptr, ram_fc_allowed_syncpoints_w()); | ||
160 | old_syncpt = pbdma_allowed_syncpoints_0_index_v(v); | 152 | old_syncpt = pbdma_allowed_syncpoints_0_index_v(v); |
161 | if (c->sync) | 153 | if (c->sync) |
162 | new_syncpt = c->sync->syncpt_id(c->sync); | 154 | new_syncpt = c->sync->syncpt_id(c->sync); |
@@ -175,7 +167,8 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) | |||
175 | 167 | ||
176 | v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt); | 168 | v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt); |
177 | 169 | ||
178 | gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v); | 170 | gk20a_mem_wr32(c->g, &c->inst_block, |
171 | ram_fc_allowed_syncpoints_w(), v); | ||
179 | } | 172 | } |
180 | 173 | ||
181 | /* enable channel */ | 174 | /* enable channel */ |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index b36eff8f..07f1014f 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -958,52 +958,51 @@ fail_free_gk20a_ctx: | |||
958 | } | 958 | } |
959 | 959 | ||
960 | static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, | 960 | static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, |
961 | struct gr_ctx_desc *gr_ctx) { | 961 | struct gr_ctx_desc *gr_ctx) |
962 | void *ctx_ptr = vmap(gr_ctx->mem.pages, | 962 | { |
963 | PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT, | 963 | struct mem_desc *mem = &gr_ctx->mem; |
964 | 0, pgprot_writecombine(PAGE_KERNEL)); | 964 | |
965 | if (!ctx_ptr) { | 965 | if (gk20a_mem_begin(g, mem)) { |
966 | WARN_ON("Cannot map context"); | 966 | WARN_ON("Cannot map context"); |
967 | return; | 967 | return; |
968 | } | 968 | } |
969 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", | 969 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", |
970 | gk20a_mem_rd32(ctx_ptr + | 970 | gk20a_mem_rd(g, mem, |
971 | ctxsw_prog_main_image_magic_value_o(), 0), | 971 | ctxsw_prog_main_image_magic_value_o()), |
972 | ctxsw_prog_main_image_magic_value_v_value_v()); | 972 | ctxsw_prog_main_image_magic_value_v_value_v()); |
973 | 973 | ||
974 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", | 974 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", |
975 | gk20a_mem_rd32(ctx_ptr + | 975 | gk20a_mem_rd(g, mem, |
976 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0)); | 976 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o())); |
977 | 977 | ||
978 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", | 978 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", |
979 | gk20a_mem_rd32(ctx_ptr + | 979 | gk20a_mem_rd(g, mem, |
980 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0)); | 980 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o())); |
981 | 981 | ||
982 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", | 982 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", |
983 | gk20a_mem_rd32(ctx_ptr + | 983 | gk20a_mem_rd(g, mem, |
984 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0)); | 984 | ctxsw_prog_main_image_context_timestamp_buffer_control_o())); |
985 | 985 | ||
986 | gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", | 986 | gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", |
987 | gk20a_mem_rd32(ctx_ptr + | 987 | gk20a_mem_rd(g, mem, |
988 | ctxsw_prog_main_image_num_save_ops_o(), 0)); | 988 | ctxsw_prog_main_image_num_save_ops_o())); |
989 | gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", | 989 | gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", |
990 | gk20a_mem_rd32(ctx_ptr + | 990 | gk20a_mem_rd(g, mem, |
991 | ctxsw_prog_main_image_num_wfi_save_ops_o(), 0)); | 991 | ctxsw_prog_main_image_num_wfi_save_ops_o())); |
992 | gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", | 992 | gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", |
993 | gk20a_mem_rd32(ctx_ptr + | 993 | gk20a_mem_rd(g, mem, |
994 | ctxsw_prog_main_image_num_cta_save_ops_o(), 0)); | 994 | ctxsw_prog_main_image_num_cta_save_ops_o())); |
995 | gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", | 995 | gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", |
996 | gk20a_mem_rd32(ctx_ptr + | 996 | gk20a_mem_rd(g, mem, |
997 | ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0)); | 997 | ctxsw_prog_main_image_num_gfxp_save_ops_o())); |
998 | gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", | 998 | gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", |
999 | gk20a_mem_rd32(ctx_ptr + | 999 | gk20a_mem_rd(g, mem, |
1000 | ctxsw_prog_main_image_num_cilp_save_ops_o(), 0)); | 1000 | ctxsw_prog_main_image_num_cilp_save_ops_o())); |
1001 | gk20a_err(dev_from_gk20a(g), | 1001 | gk20a_err(dev_from_gk20a(g), |
1002 | "image gfx preemption option (GFXP is 1) %x\n", | 1002 | "image gfx preemption option (GFXP is 1) %x\n", |
1003 | gk20a_mem_rd32(ctx_ptr + | 1003 | gk20a_mem_rd(g, mem, |
1004 | ctxsw_prog_main_image_graphics_preemption_options_o(), | 1004 | ctxsw_prog_main_image_graphics_preemption_options_o())); |
1005 | 0)); | 1005 | gk20a_mem_end(g, mem); |
1006 | vunmap(ctx_ptr); | ||
1007 | } | 1006 | } |
1008 | 1007 | ||
1009 | static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | 1008 | static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, |
@@ -1028,7 +1027,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | |||
1028 | 1027 | ||
1029 | static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | 1028 | static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, |
1030 | struct channel_ctx_gk20a *ch_ctx, | 1029 | struct channel_ctx_gk20a *ch_ctx, |
1031 | void *ctx_ptr) | 1030 | struct mem_desc *mem) |
1032 | { | 1031 | { |
1033 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 1032 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; |
1034 | u32 gfxp_preempt_option = | 1033 | u32 gfxp_preempt_option = |
@@ -1043,19 +1042,22 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1043 | 1042 | ||
1044 | if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { | 1043 | if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { |
1045 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); | 1044 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); |
1046 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0, | 1045 | gk20a_mem_wr(g, mem, |
1046 | ctxsw_prog_main_image_graphics_preemption_options_o(), | ||
1047 | gfxp_preempt_option); | 1047 | gfxp_preempt_option); |
1048 | } | 1048 | } |
1049 | 1049 | ||
1050 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { | 1050 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { |
1051 | gk20a_dbg_info("CILP: %x", cilp_preempt_option); | 1051 | gk20a_dbg_info("CILP: %x", cilp_preempt_option); |
1052 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, | 1052 | gk20a_mem_wr(g, mem, |
1053 | ctxsw_prog_main_image_compute_preemption_options_o(), | ||
1053 | cilp_preempt_option); | 1054 | cilp_preempt_option); |
1054 | } | 1055 | } |
1055 | 1056 | ||
1056 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { | 1057 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { |
1057 | gk20a_dbg_info("CTA: %x", cta_preempt_option); | 1058 | gk20a_dbg_info("CTA: %x", cta_preempt_option); |
1058 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, | 1059 | gk20a_mem_wr(g, mem, |
1060 | ctxsw_prog_main_image_compute_preemption_options_o(), | ||
1059 | cta_preempt_option); | 1061 | cta_preempt_option); |
1060 | } | 1062 | } |
1061 | 1063 | ||
@@ -1064,7 +1066,8 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1064 | u32 size; | 1066 | u32 size; |
1065 | u32 cbes_reserve; | 1067 | u32 cbes_reserve; |
1066 | 1068 | ||
1067 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0, | 1069 | gk20a_mem_wr(g, mem, |
1070 | ctxsw_prog_main_image_full_preemption_ptr_o(), | ||
1068 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); | 1071 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); |
1069 | 1072 | ||
1070 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 1073 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
@@ -1931,7 +1934,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
1931 | struct gk20a *g = ch->g; | 1934 | struct gk20a *g = ch->g; |
1932 | struct tsg_gk20a *tsg; | 1935 | struct tsg_gk20a *tsg; |
1933 | struct vm_gk20a *vm; | 1936 | struct vm_gk20a *vm; |
1934 | void *ctx_ptr; | 1937 | struct mem_desc *mem = &gr_ctx->mem; |
1935 | u32 class; | 1938 | u32 class; |
1936 | int err = 0; | 1939 | int err = 0; |
1937 | 1940 | ||
@@ -1955,10 +1958,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
1955 | if (err) | 1958 | if (err) |
1956 | return err; | 1959 | return err; |
1957 | 1960 | ||
1958 | ctx_ptr = vmap(gr_ctx->mem.pages, | 1961 | if (gk20a_mem_begin(g, mem)) |
1959 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1960 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1961 | if (!ctx_ptr) | ||
1962 | return -ENOMEM; | 1962 | return -ENOMEM; |
1963 | 1963 | ||
1964 | g->ops.fifo.disable_channel(ch); | 1964 | g->ops.fifo.disable_channel(ch); |
@@ -1967,14 +1967,14 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
1967 | goto unmap_ctx; | 1967 | goto unmap_ctx; |
1968 | 1968 | ||
1969 | if (g->ops.gr.update_ctxsw_preemption_mode) { | 1969 | if (g->ops.gr.update_ctxsw_preemption_mode) { |
1970 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, ctx_ptr); | 1970 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); |
1971 | g->ops.gr.commit_global_cb_manager(g, ch, true); | 1971 | g->ops.gr.commit_global_cb_manager(g, ch, true); |
1972 | } | 1972 | } |
1973 | 1973 | ||
1974 | g->ops.fifo.enable_channel(ch); | 1974 | g->ops.fifo.enable_channel(ch); |
1975 | 1975 | ||
1976 | unmap_ctx: | 1976 | unmap_ctx: |
1977 | vunmap(ctx_ptr); | 1977 | gk20a_mem_end(g, mem); |
1978 | 1978 | ||
1979 | return err; | 1979 | return err; |
1980 | } | 1980 | } |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 0e1f3c4b..0b693f7c 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -145,9 +145,14 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, | |||
145 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 145 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
146 | } | 146 | } |
147 | 147 | ||
148 | static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) | 148 | static u32 pde3_from_index(u32 i) |
149 | { | 149 | { |
150 | return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v()); | 150 | return i * gmmu_new_pde__size_v() / sizeof(u32); |
151 | } | ||
152 | |||
153 | static u32 pte3_from_index(u32 i) | ||
154 | { | ||
155 | return i * gmmu_new_pte__size_v() / sizeof(u32); | ||
151 | } | 156 | } |
152 | 157 | ||
153 | static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) | 158 | static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) |
@@ -176,7 +181,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
176 | u64 pde_addr = 0; | 181 | u64 pde_addr = 0; |
177 | struct gk20a_mm_entry *pte = parent->entries + i; | 182 | struct gk20a_mm_entry *pte = parent->entries + i; |
178 | u32 pde_v[2] = {0, 0}; | 183 | u32 pde_v[2] = {0, 0}; |
179 | u32 *pde; | 184 | u32 pde; |
180 | 185 | ||
181 | gk20a_dbg_fn(""); | 186 | gk20a_dbg_fn(""); |
182 | 187 | ||
@@ -189,10 +194,10 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
189 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); | 194 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); |
190 | pde_v[0] |= gmmu_new_pde_vol_true_f(); | 195 | pde_v[0] |= gmmu_new_pde_vol_true_f(); |
191 | pde_v[1] |= pte_addr >> 24; | 196 | pde_v[1] |= pte_addr >> 24; |
192 | pde = pde3_from_index(parent, i); | 197 | pde = pde3_from_index(i); |
193 | 198 | ||
194 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 199 | gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]); |
195 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 200 | gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]); |
196 | 201 | ||
197 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | 202 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
198 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | 203 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); |
@@ -200,9 +205,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
200 | return 0; | 205 | return 0; |
201 | } | 206 | } |
202 | 207 | ||
203 | static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) | 208 | static u32 pde0_from_index(u32 i) |
204 | { | 209 | { |
205 | return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v()); | 210 | return i * gmmu_new_dual_pde__size_v() / sizeof(u32); |
206 | } | 211 | } |
207 | 212 | ||
208 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | 213 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, |
@@ -220,7 +225,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
220 | u32 pte_addr_small = 0, pte_addr_big = 0; | 225 | u32 pte_addr_small = 0, pte_addr_big = 0; |
221 | struct gk20a_mm_entry *entry = pte->entries + i; | 226 | struct gk20a_mm_entry *entry = pte->entries + i; |
222 | u32 pde_v[4] = {0, 0, 0, 0}; | 227 | u32 pde_v[4] = {0, 0, 0, 0}; |
223 | u32 *pde; | 228 | u32 pde; |
224 | 229 | ||
225 | gk20a_dbg_fn(""); | 230 | gk20a_dbg_fn(""); |
226 | 231 | ||
@@ -254,12 +259,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
254 | pde_v[1] |= pte_addr_big >> 28; | 259 | pde_v[1] |= pte_addr_big >> 28; |
255 | } | 260 | } |
256 | 261 | ||
257 | pde = pde0_from_index(pte, i); | 262 | pde = pde0_from_index(i); |
258 | 263 | ||
259 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 264 | gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]); |
260 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 265 | gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]); |
261 | gk20a_mem_wr32(pde, 2, pde_v[2]); | 266 | gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]); |
262 | gk20a_mem_wr32(pde, 3, pde_v[3]); | 267 | gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]); |
263 | 268 | ||
264 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", | 269 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", |
265 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); | 270 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); |
@@ -323,8 +328,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
323 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 328 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
324 | } | 329 | } |
325 | 330 | ||
326 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); | 331 | gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]); |
327 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); | 332 | gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]); |
328 | 333 | ||
329 | if (*iova) { | 334 | if (*iova) { |
330 | *iova += page_size; | 335 | *iova += page_size; |
@@ -376,12 +381,13 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, | |||
376 | return gp10b_mm_levels; | 381 | return gp10b_mm_levels; |
377 | } | 382 | } |
378 | 383 | ||
379 | static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | 384 | static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, |
385 | u64 pdb_addr) | ||
380 | { | 386 | { |
381 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 387 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
382 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 388 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
383 | 389 | ||
384 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 390 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), |
385 | (g->mm.vidmem_is_vidmem ? | 391 | (g->mm.vidmem_is_vidmem ? |
386 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : | 392 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : |
387 | ram_in_page_dir_base_target_vid_mem_f()) | | 393 | ram_in_page_dir_base_target_vid_mem_f()) | |
@@ -389,7 +395,7 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | |||
389 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | | 395 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | |
390 | 1 << 10); | 396 | 1 << 10); |
391 | 397 | ||
392 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | 398 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), |
393 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 399 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
394 | } | 400 | } |
395 | 401 | ||