diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 61 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 78 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 44 |
3 files changed, 91 insertions, 92 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index 9cb26d3f..4766e0e4 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | |||
@@ -25,24 +25,24 @@ | |||
25 | #include "hw_ram_gp10b.h" | 25 | #include "hw_ram_gp10b.h" |
26 | 26 | ||
27 | static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g, | 27 | static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g, |
28 | void *inst_ptr) | 28 | struct mem_desc *mem) |
29 | { | 29 | { |
30 | u32 val; | 30 | u32 val; |
31 | 31 | ||
32 | gk20a_dbg_fn(""); | 32 | gk20a_dbg_fn(""); |
33 | 33 | ||
34 | val = gk20a_mem_rd32(inst_ptr, | 34 | val = gk20a_mem_rd32(g, mem, |
35 | ram_in_page_dir_base_fault_replay_tex_w()); | 35 | ram_in_page_dir_base_fault_replay_tex_w()); |
36 | val &= ~ram_in_page_dir_base_fault_replay_tex_m(); | 36 | val &= ~ram_in_page_dir_base_fault_replay_tex_m(); |
37 | val |= ram_in_page_dir_base_fault_replay_tex_true_f(); | 37 | val |= ram_in_page_dir_base_fault_replay_tex_true_f(); |
38 | gk20a_mem_wr32(inst_ptr, | 38 | gk20a_mem_wr32(g, mem, |
39 | ram_in_page_dir_base_fault_replay_tex_w(), val); | 39 | ram_in_page_dir_base_fault_replay_tex_w(), val); |
40 | 40 | ||
41 | val = gk20a_mem_rd32(inst_ptr, | 41 | val = gk20a_mem_rd32(g, mem, |
42 | ram_in_page_dir_base_fault_replay_gcc_w()); | 42 | ram_in_page_dir_base_fault_replay_gcc_w()); |
43 | val &= ~ram_in_page_dir_base_fault_replay_gcc_m(); | 43 | val &= ~ram_in_page_dir_base_fault_replay_gcc_m(); |
44 | val |= ram_in_page_dir_base_fault_replay_gcc_true_f(); | 44 | val |= ram_in_page_dir_base_fault_replay_gcc_true_f(); |
45 | gk20a_mem_wr32(inst_ptr, | 45 | gk20a_mem_wr32(g, mem, |
46 | ram_in_page_dir_base_fault_replay_gcc_w(), val); | 46 | ram_in_page_dir_base_fault_replay_gcc_w(), val); |
47 | 47 | ||
48 | gk20a_dbg_fn("done"); | 48 | gk20a_dbg_fn("done"); |
@@ -52,28 +52,25 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c) | |||
52 | { | 52 | { |
53 | u32 addr_lo; | 53 | u32 addr_lo; |
54 | u32 addr_hi; | 54 | u32 addr_hi; |
55 | void *inst_ptr; | ||
56 | struct gk20a *g = c->g; | 55 | struct gk20a *g = c->g; |
57 | 56 | ||
58 | gk20a_dbg_fn(""); | 57 | gk20a_dbg_fn(""); |
59 | 58 | ||
60 | inst_ptr = c->inst_block.cpu_va; | ||
61 | if (!inst_ptr) | ||
62 | return -ENOMEM; | ||
63 | |||
64 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); | 59 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); |
65 | addr_hi = u64_hi32(c->userd_iova); | 60 | addr_hi = u64_hi32(c->userd_iova); |
66 | 61 | ||
67 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", | 62 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", |
68 | c->hw_chid, (u64)c->userd_iova); | 63 | c->hw_chid, (u64)c->userd_iova); |
69 | 64 | ||
70 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), | 65 | gk20a_mem_wr32(g, &c->inst_block, |
66 | ram_in_ramfc_w() + ram_fc_userd_w(), | ||
71 | (g->mm.vidmem_is_vidmem ? | 67 | (g->mm.vidmem_is_vidmem ? |
72 | pbdma_userd_target_sys_mem_ncoh_f() : | 68 | pbdma_userd_target_sys_mem_ncoh_f() : |
73 | pbdma_userd_target_vid_mem_f()) | | 69 | pbdma_userd_target_vid_mem_f()) | |
74 | pbdma_userd_addr_f(addr_lo)); | 70 | pbdma_userd_addr_f(addr_lo)); |
75 | 71 | ||
76 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), | 72 | gk20a_mem_wr32(g, &c->inst_block, |
73 | ram_in_ramfc_w() + ram_fc_userd_hi_w(), | ||
77 | pbdma_userd_hi_addr_f(addr_hi)); | 74 | pbdma_userd_hi_addr_f(addr_hi)); |
78 | 75 | ||
79 | return 0; | 76 | return 0; |
@@ -82,33 +79,30 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c) | |||
82 | static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, | 79 | static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, |
83 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) | 80 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) |
84 | { | 81 | { |
85 | void *inst_ptr; | 82 | struct gk20a *g = c->g; |
83 | struct mem_desc *mem = &c->inst_block; | ||
86 | 84 | ||
87 | gk20a_dbg_fn(""); | 85 | gk20a_dbg_fn(""); |
88 | 86 | ||
89 | inst_ptr = c->inst_block.cpu_va; | 87 | gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); |
90 | if (!inst_ptr) | ||
91 | return -ENOMEM; | ||
92 | 88 | ||
93 | memset(inst_ptr, 0, ram_fc_size_val_v()); | 89 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), |
94 | |||
95 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), | ||
96 | pbdma_gp_base_offset_f( | 90 | pbdma_gp_base_offset_f( |
97 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); | 91 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); |
98 | 92 | ||
99 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), | 93 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), |
100 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | | 94 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | |
101 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); | 95 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); |
102 | 96 | ||
103 | gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), | 97 | gk20a_mem_wr32(g, mem, ram_fc_signature_w(), |
104 | c->g->ops.fifo.get_pbdma_signature(c->g)); | 98 | c->g->ops.fifo.get_pbdma_signature(c->g)); |
105 | 99 | ||
106 | gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), | 100 | gk20a_mem_wr32(g, mem, ram_fc_formats_w(), |
107 | pbdma_formats_gp_fermi0_f() | | 101 | pbdma_formats_gp_fermi0_f() | |
108 | pbdma_formats_pb_fermi1_f() | | 102 | pbdma_formats_pb_fermi1_f() | |
109 | pbdma_formats_mp_fermi0_f()); | 103 | pbdma_formats_mp_fermi0_f()); |
110 | 104 | ||
111 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), | 105 | gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), |
112 | pbdma_pb_header_priv_user_f() | | 106 | pbdma_pb_header_priv_user_f() | |
113 | pbdma_pb_header_method_zero_f() | | 107 | pbdma_pb_header_method_zero_f() | |
114 | pbdma_pb_header_subchannel_zero_f() | | 108 | pbdma_pb_header_subchannel_zero_f() | |
@@ -116,26 +110,26 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, | |||
116 | pbdma_pb_header_first_true_f() | | 110 | pbdma_pb_header_first_true_f() | |
117 | pbdma_pb_header_type_inc_f()); | 111 | pbdma_pb_header_type_inc_f()); |
118 | 112 | ||
119 | gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), | 113 | gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), |
120 | pbdma_subdevice_id_f(1) | | 114 | pbdma_subdevice_id_f(1) | |
121 | pbdma_subdevice_status_active_f() | | 115 | pbdma_subdevice_status_active_f() | |
122 | pbdma_subdevice_channel_dma_enable_f()); | 116 | pbdma_subdevice_channel_dma_enable_f()); |
123 | 117 | ||
124 | gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); | 118 | gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); |
125 | 119 | ||
126 | gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), | 120 | gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), |
127 | channel_gk20a_pbdma_acquire_val(c)); | 121 | channel_gk20a_pbdma_acquire_val(c)); |
128 | 122 | ||
129 | gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), | 123 | gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), |
130 | pbdma_runlist_timeslice_timeout_128_f() | | 124 | pbdma_runlist_timeslice_timeout_128_f() | |
131 | pbdma_runlist_timeslice_timescale_3_f() | | 125 | pbdma_runlist_timeslice_timescale_3_f() | |
132 | pbdma_runlist_timeslice_enable_true_f()); | 126 | pbdma_runlist_timeslice_enable_true_f()); |
133 | 127 | ||
134 | if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) | 128 | if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) |
135 | gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr); | 129 | gp10b_set_pdb_fault_replay_flags(c->g, mem); |
136 | 130 | ||
137 | 131 | ||
138 | gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); | 132 | gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); |
139 | 133 | ||
140 | return channel_gp10b_commit_userd(c); | 134 | return channel_gp10b_commit_userd(c); |
141 | } | 135 | } |
@@ -149,14 +143,12 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g) | |||
149 | static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) | 143 | static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) |
150 | { | 144 | { |
151 | u32 new_syncpt = 0, old_syncpt; | 145 | u32 new_syncpt = 0, old_syncpt; |
152 | void *inst_ptr; | ||
153 | u32 v; | 146 | u32 v; |
154 | 147 | ||
155 | gk20a_dbg_fn(""); | 148 | gk20a_dbg_fn(""); |
156 | 149 | ||
157 | inst_ptr = c->inst_block.cpu_va; | 150 | v = gk20a_mem_rd32(c->g, &c->inst_block, |
158 | 151 | ram_fc_allowed_syncpoints_w()); | |
159 | v = gk20a_mem_rd32(inst_ptr, ram_fc_allowed_syncpoints_w()); | ||
160 | old_syncpt = pbdma_allowed_syncpoints_0_index_v(v); | 152 | old_syncpt = pbdma_allowed_syncpoints_0_index_v(v); |
161 | if (c->sync) | 153 | if (c->sync) |
162 | new_syncpt = c->sync->syncpt_id(c->sync); | 154 | new_syncpt = c->sync->syncpt_id(c->sync); |
@@ -175,7 +167,8 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) | |||
175 | 167 | ||
176 | v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt); | 168 | v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt); |
177 | 169 | ||
178 | gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v); | 170 | gk20a_mem_wr32(c->g, &c->inst_block, |
171 | ram_fc_allowed_syncpoints_w(), v); | ||
179 | } | 172 | } |
180 | 173 | ||
181 | /* enable channel */ | 174 | /* enable channel */ |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index b36eff8f..07f1014f 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -958,52 +958,51 @@ fail_free_gk20a_ctx: | |||
958 | } | 958 | } |
959 | 959 | ||
960 | static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, | 960 | static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, |
961 | struct gr_ctx_desc *gr_ctx) { | 961 | struct gr_ctx_desc *gr_ctx) |
962 | void *ctx_ptr = vmap(gr_ctx->mem.pages, | 962 | { |
963 | PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT, | 963 | struct mem_desc *mem = &gr_ctx->mem; |
964 | 0, pgprot_writecombine(PAGE_KERNEL)); | 964 | |
965 | if (!ctx_ptr) { | 965 | if (gk20a_mem_begin(g, mem)) { |
966 | WARN_ON("Cannot map context"); | 966 | WARN_ON("Cannot map context"); |
967 | return; | 967 | return; |
968 | } | 968 | } |
969 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", | 969 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", |
970 | gk20a_mem_rd32(ctx_ptr + | 970 | gk20a_mem_rd(g, mem, |
971 | ctxsw_prog_main_image_magic_value_o(), 0), | 971 | ctxsw_prog_main_image_magic_value_o()), |
972 | ctxsw_prog_main_image_magic_value_v_value_v()); | 972 | ctxsw_prog_main_image_magic_value_v_value_v()); |
973 | 973 | ||
974 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", | 974 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", |
975 | gk20a_mem_rd32(ctx_ptr + | 975 | gk20a_mem_rd(g, mem, |
976 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0)); | 976 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o())); |
977 | 977 | ||
978 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", | 978 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", |
979 | gk20a_mem_rd32(ctx_ptr + | 979 | gk20a_mem_rd(g, mem, |
980 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0)); | 980 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o())); |
981 | 981 | ||
982 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", | 982 | gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", |
983 | gk20a_mem_rd32(ctx_ptr + | 983 | gk20a_mem_rd(g, mem, |
984 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0)); | 984 | ctxsw_prog_main_image_context_timestamp_buffer_control_o())); |
985 | 985 | ||
986 | gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", | 986 | gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", |
987 | gk20a_mem_rd32(ctx_ptr + | 987 | gk20a_mem_rd(g, mem, |
988 | ctxsw_prog_main_image_num_save_ops_o(), 0)); | 988 | ctxsw_prog_main_image_num_save_ops_o())); |
989 | gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", | 989 | gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", |
990 | gk20a_mem_rd32(ctx_ptr + | 990 | gk20a_mem_rd(g, mem, |
991 | ctxsw_prog_main_image_num_wfi_save_ops_o(), 0)); | 991 | ctxsw_prog_main_image_num_wfi_save_ops_o())); |
992 | gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", | 992 | gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", |
993 | gk20a_mem_rd32(ctx_ptr + | 993 | gk20a_mem_rd(g, mem, |
994 | ctxsw_prog_main_image_num_cta_save_ops_o(), 0)); | 994 | ctxsw_prog_main_image_num_cta_save_ops_o())); |
995 | gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", | 995 | gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", |
996 | gk20a_mem_rd32(ctx_ptr + | 996 | gk20a_mem_rd(g, mem, |
997 | ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0)); | 997 | ctxsw_prog_main_image_num_gfxp_save_ops_o())); |
998 | gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", | 998 | gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", |
999 | gk20a_mem_rd32(ctx_ptr + | 999 | gk20a_mem_rd(g, mem, |
1000 | ctxsw_prog_main_image_num_cilp_save_ops_o(), 0)); | 1000 | ctxsw_prog_main_image_num_cilp_save_ops_o())); |
1001 | gk20a_err(dev_from_gk20a(g), | 1001 | gk20a_err(dev_from_gk20a(g), |
1002 | "image gfx preemption option (GFXP is 1) %x\n", | 1002 | "image gfx preemption option (GFXP is 1) %x\n", |
1003 | gk20a_mem_rd32(ctx_ptr + | 1003 | gk20a_mem_rd(g, mem, |
1004 | ctxsw_prog_main_image_graphics_preemption_options_o(), | 1004 | ctxsw_prog_main_image_graphics_preemption_options_o())); |
1005 | 0)); | 1005 | gk20a_mem_end(g, mem); |
1006 | vunmap(ctx_ptr); | ||
1007 | } | 1006 | } |
1008 | 1007 | ||
1009 | static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | 1008 | static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, |
@@ -1028,7 +1027,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | |||
1028 | 1027 | ||
1029 | static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | 1028 | static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, |
1030 | struct channel_ctx_gk20a *ch_ctx, | 1029 | struct channel_ctx_gk20a *ch_ctx, |
1031 | void *ctx_ptr) | 1030 | struct mem_desc *mem) |
1032 | { | 1031 | { |
1033 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 1032 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; |
1034 | u32 gfxp_preempt_option = | 1033 | u32 gfxp_preempt_option = |
@@ -1043,19 +1042,22 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1043 | 1042 | ||
1044 | if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { | 1043 | if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { |
1045 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); | 1044 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); |
1046 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0, | 1045 | gk20a_mem_wr(g, mem, |
1046 | ctxsw_prog_main_image_graphics_preemption_options_o(), | ||
1047 | gfxp_preempt_option); | 1047 | gfxp_preempt_option); |
1048 | } | 1048 | } |
1049 | 1049 | ||
1050 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { | 1050 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { |
1051 | gk20a_dbg_info("CILP: %x", cilp_preempt_option); | 1051 | gk20a_dbg_info("CILP: %x", cilp_preempt_option); |
1052 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, | 1052 | gk20a_mem_wr(g, mem, |
1053 | ctxsw_prog_main_image_compute_preemption_options_o(), | ||
1053 | cilp_preempt_option); | 1054 | cilp_preempt_option); |
1054 | } | 1055 | } |
1055 | 1056 | ||
1056 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { | 1057 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { |
1057 | gk20a_dbg_info("CTA: %x", cta_preempt_option); | 1058 | gk20a_dbg_info("CTA: %x", cta_preempt_option); |
1058 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, | 1059 | gk20a_mem_wr(g, mem, |
1060 | ctxsw_prog_main_image_compute_preemption_options_o(), | ||
1059 | cta_preempt_option); | 1061 | cta_preempt_option); |
1060 | } | 1062 | } |
1061 | 1063 | ||
@@ -1064,7 +1066,8 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1064 | u32 size; | 1066 | u32 size; |
1065 | u32 cbes_reserve; | 1067 | u32 cbes_reserve; |
1066 | 1068 | ||
1067 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0, | 1069 | gk20a_mem_wr(g, mem, |
1070 | ctxsw_prog_main_image_full_preemption_ptr_o(), | ||
1068 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); | 1071 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); |
1069 | 1072 | ||
1070 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 1073 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
@@ -1931,7 +1934,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
1931 | struct gk20a *g = ch->g; | 1934 | struct gk20a *g = ch->g; |
1932 | struct tsg_gk20a *tsg; | 1935 | struct tsg_gk20a *tsg; |
1933 | struct vm_gk20a *vm; | 1936 | struct vm_gk20a *vm; |
1934 | void *ctx_ptr; | 1937 | struct mem_desc *mem = &gr_ctx->mem; |
1935 | u32 class; | 1938 | u32 class; |
1936 | int err = 0; | 1939 | int err = 0; |
1937 | 1940 | ||
@@ -1955,10 +1958,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
1955 | if (err) | 1958 | if (err) |
1956 | return err; | 1959 | return err; |
1957 | 1960 | ||
1958 | ctx_ptr = vmap(gr_ctx->mem.pages, | 1961 | if (gk20a_mem_begin(g, mem)) |
1959 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1960 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1961 | if (!ctx_ptr) | ||
1962 | return -ENOMEM; | 1962 | return -ENOMEM; |
1963 | 1963 | ||
1964 | g->ops.fifo.disable_channel(ch); | 1964 | g->ops.fifo.disable_channel(ch); |
@@ -1967,14 +1967,14 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
1967 | goto unmap_ctx; | 1967 | goto unmap_ctx; |
1968 | 1968 | ||
1969 | if (g->ops.gr.update_ctxsw_preemption_mode) { | 1969 | if (g->ops.gr.update_ctxsw_preemption_mode) { |
1970 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, ctx_ptr); | 1970 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); |
1971 | g->ops.gr.commit_global_cb_manager(g, ch, true); | 1971 | g->ops.gr.commit_global_cb_manager(g, ch, true); |
1972 | } | 1972 | } |
1973 | 1973 | ||
1974 | g->ops.fifo.enable_channel(ch); | 1974 | g->ops.fifo.enable_channel(ch); |
1975 | 1975 | ||
1976 | unmap_ctx: | 1976 | unmap_ctx: |
1977 | vunmap(ctx_ptr); | 1977 | gk20a_mem_end(g, mem); |
1978 | 1978 | ||
1979 | return err; | 1979 | return err; |
1980 | } | 1980 | } |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 0e1f3c4b..0b693f7c 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -145,9 +145,14 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, | |||
145 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 145 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
146 | } | 146 | } |
147 | 147 | ||
148 | static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) | 148 | static u32 pde3_from_index(u32 i) |
149 | { | 149 | { |
150 | return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v()); | 150 | return i * gmmu_new_pde__size_v() / sizeof(u32); |
151 | } | ||
152 | |||
153 | static u32 pte3_from_index(u32 i) | ||
154 | { | ||
155 | return i * gmmu_new_pte__size_v() / sizeof(u32); | ||
151 | } | 156 | } |
152 | 157 | ||
153 | static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) | 158 | static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) |
@@ -176,7 +181,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
176 | u64 pde_addr = 0; | 181 | u64 pde_addr = 0; |
177 | struct gk20a_mm_entry *pte = parent->entries + i; | 182 | struct gk20a_mm_entry *pte = parent->entries + i; |
178 | u32 pde_v[2] = {0, 0}; | 183 | u32 pde_v[2] = {0, 0}; |
179 | u32 *pde; | 184 | u32 pde; |
180 | 185 | ||
181 | gk20a_dbg_fn(""); | 186 | gk20a_dbg_fn(""); |
182 | 187 | ||
@@ -189,10 +194,10 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
189 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); | 194 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); |
190 | pde_v[0] |= gmmu_new_pde_vol_true_f(); | 195 | pde_v[0] |= gmmu_new_pde_vol_true_f(); |
191 | pde_v[1] |= pte_addr >> 24; | 196 | pde_v[1] |= pte_addr >> 24; |
192 | pde = pde3_from_index(parent, i); | 197 | pde = pde3_from_index(i); |
193 | 198 | ||
194 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 199 | gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]); |
195 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 200 | gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]); |
196 | 201 | ||
197 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | 202 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
198 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | 203 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); |
@@ -200,9 +205,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
200 | return 0; | 205 | return 0; |
201 | } | 206 | } |
202 | 207 | ||
203 | static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) | 208 | static u32 pde0_from_index(u32 i) |
204 | { | 209 | { |
205 | return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v()); | 210 | return i * gmmu_new_dual_pde__size_v() / sizeof(u32); |
206 | } | 211 | } |
207 | 212 | ||
208 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | 213 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, |
@@ -220,7 +225,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
220 | u32 pte_addr_small = 0, pte_addr_big = 0; | 225 | u32 pte_addr_small = 0, pte_addr_big = 0; |
221 | struct gk20a_mm_entry *entry = pte->entries + i; | 226 | struct gk20a_mm_entry *entry = pte->entries + i; |
222 | u32 pde_v[4] = {0, 0, 0, 0}; | 227 | u32 pde_v[4] = {0, 0, 0, 0}; |
223 | u32 *pde; | 228 | u32 pde; |
224 | 229 | ||
225 | gk20a_dbg_fn(""); | 230 | gk20a_dbg_fn(""); |
226 | 231 | ||
@@ -254,12 +259,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
254 | pde_v[1] |= pte_addr_big >> 28; | 259 | pde_v[1] |= pte_addr_big >> 28; |
255 | } | 260 | } |
256 | 261 | ||
257 | pde = pde0_from_index(pte, i); | 262 | pde = pde0_from_index(i); |
258 | 263 | ||
259 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 264 | gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]); |
260 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 265 | gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]); |
261 | gk20a_mem_wr32(pde, 2, pde_v[2]); | 266 | gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]); |
262 | gk20a_mem_wr32(pde, 3, pde_v[3]); | 267 | gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]); |
263 | 268 | ||
264 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", | 269 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", |
265 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); | 270 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); |
@@ -323,8 +328,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
323 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 328 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
324 | } | 329 | } |
325 | 330 | ||
326 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); | 331 | gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]); |
327 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); | 332 | gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]); |
328 | 333 | ||
329 | if (*iova) { | 334 | if (*iova) { |
330 | *iova += page_size; | 335 | *iova += page_size; |
@@ -376,12 +381,13 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, | |||
376 | return gp10b_mm_levels; | 381 | return gp10b_mm_levels; |
377 | } | 382 | } |
378 | 383 | ||
379 | static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | 384 | static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, |
385 | u64 pdb_addr) | ||
380 | { | 386 | { |
381 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 387 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
382 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 388 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
383 | 389 | ||
384 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 390 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), |
385 | (g->mm.vidmem_is_vidmem ? | 391 | (g->mm.vidmem_is_vidmem ? |
386 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : | 392 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : |
387 | ram_in_page_dir_base_target_vid_mem_f()) | | 393 | ram_in_page_dir_base_target_vid_mem_f()) | |
@@ -389,7 +395,7 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | |||
389 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | | 395 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | |
390 | 1 << 10); | 396 | 1 << 10); |
391 | 397 | ||
392 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | 398 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), |
393 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 399 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
394 | } | 400 | } |
395 | 401 | ||