diff options
author | Nicolin Chen <nicolinc@nvidia.com> | 2018-08-13 23:22:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-22 20:33:42 -0400 |
commit | 52305f0514d29e7fb2cb5e2154188e09faa3fe94 (patch) | |
tree | f5b50db358366692188e008ee2303dc5135e65ea | |
parent | d5473e225decc74f0d6bb015d06365dad15828d0 (diff) |
gpu: nvgpu: Reduce structure padding waste
The gk20a_init_fifo_setup_sw_common() function allocates memory of
schannel_gk20a and tsg_gk20a tructures for all 512 channels:
Size Caller Module Pages Type
749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc
602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc
This change just simply reorgnizes the member defines in those two
structures to reduce padding waste. After this change:
Size Caller Module Pages Type
733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc
585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc
In summary, it saves 8 pages in 32KB memory.
Bug 2327574
Bug 2284925
Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1803358
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 52 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 19 |
3 files changed, 37 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9f737192..7c3d950b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -197,7 +197,6 @@ struct channel_gk20a { | |||
197 | struct nvgpu_list_node free_chs; | 197 | struct nvgpu_list_node free_chs; |
198 | 198 | ||
199 | struct nvgpu_spinlock ref_obtain_lock; | 199 | struct nvgpu_spinlock ref_obtain_lock; |
200 | bool referenceable; | ||
201 | nvgpu_atomic_t ref_count; | 200 | nvgpu_atomic_t ref_count; |
202 | struct nvgpu_cond ref_count_dec_wq; | 201 | struct nvgpu_cond ref_count_dec_wq; |
203 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | 202 | #if GK20A_CHANNEL_REFCOUNT_TRACKING |
@@ -214,19 +213,14 @@ struct channel_gk20a { | |||
214 | 213 | ||
215 | struct nvgpu_semaphore_int *hw_sema; | 214 | struct nvgpu_semaphore_int *hw_sema; |
216 | 215 | ||
217 | int chid; | ||
218 | nvgpu_atomic_t bound; | 216 | nvgpu_atomic_t bound; |
219 | bool vpr; | 217 | |
220 | bool deterministic; | 218 | int chid; |
221 | /* deterministic, but explicitly idle and submits disallowed */ | 219 | int tsgid; |
222 | bool deterministic_railgate_allowed; | ||
223 | bool cde; | ||
224 | bool usermode_submit_enabled; | ||
225 | pid_t pid; | 220 | pid_t pid; |
226 | pid_t tgid; | 221 | pid_t tgid; |
227 | struct nvgpu_mutex ioctl_lock; | 222 | struct nvgpu_mutex ioctl_lock; |
228 | 223 | ||
229 | int tsgid; | ||
230 | struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ | 224 | struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ |
231 | 225 | ||
232 | struct channel_gk20a_joblist joblist; | 226 | struct channel_gk20a_joblist joblist; |
@@ -242,16 +236,11 @@ struct channel_gk20a { | |||
242 | u64 userd_iova; | 236 | u64 userd_iova; |
243 | u64 userd_gpu_va; | 237 | u64 userd_gpu_va; |
244 | 238 | ||
245 | u32 obj_class; /* we support only one obj per channel */ | ||
246 | |||
247 | struct priv_cmd_queue priv_cmd_q; | 239 | struct priv_cmd_queue priv_cmd_q; |
248 | 240 | ||
249 | struct nvgpu_cond notifier_wq; | 241 | struct nvgpu_cond notifier_wq; |
250 | struct nvgpu_cond semaphore_wq; | 242 | struct nvgpu_cond semaphore_wq; |
251 | 243 | ||
252 | u32 timeout_accumulated_ms; | ||
253 | u32 timeout_gpfifo_get; | ||
254 | |||
255 | /* kernel watchdog to kill stuck jobs */ | 244 | /* kernel watchdog to kill stuck jobs */ |
256 | struct channel_gk20a_timeout timeout; | 245 | struct channel_gk20a_timeout timeout; |
257 | 246 | ||
@@ -271,32 +260,43 @@ struct channel_gk20a { | |||
271 | struct nvgpu_mutex dbg_s_lock; | 260 | struct nvgpu_mutex dbg_s_lock; |
272 | struct nvgpu_list_node dbg_s_list; | 261 | struct nvgpu_list_node dbg_s_list; |
273 | 262 | ||
274 | bool has_timedout; | ||
275 | u32 timeout_ms_max; | ||
276 | bool timeout_debug_dump; | ||
277 | |||
278 | struct nvgpu_mutex sync_lock; | 263 | struct nvgpu_mutex sync_lock; |
279 | struct gk20a_channel_sync *sync; | 264 | struct gk20a_channel_sync *sync; |
280 | struct gk20a_channel_sync *user_sync; | 265 | struct gk20a_channel_sync *user_sync; |
281 | 266 | ||
282 | bool has_os_fence_framework_support; | ||
283 | |||
284 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 267 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
285 | u64 virt_ctx; | 268 | u64 virt_ctx; |
286 | #endif | 269 | #endif |
287 | 270 | ||
288 | u32 runlist_id; | ||
289 | |||
290 | bool is_privileged_channel; | ||
291 | u32 subctx_id; | ||
292 | u32 runqueue_sel; | ||
293 | |||
294 | struct ctx_header_desc ctx_header; | 271 | struct ctx_header_desc ctx_header; |
295 | 272 | ||
296 | /* Any operating system specific data. */ | 273 | /* Any operating system specific data. */ |
297 | void *os_priv; | 274 | void *os_priv; |
298 | 275 | ||
276 | u32 obj_class; /* we support only one obj per channel */ | ||
277 | |||
278 | u32 timeout_accumulated_ms; | ||
279 | u32 timeout_gpfifo_get; | ||
280 | |||
281 | u32 subctx_id; | ||
282 | u32 runqueue_sel; | ||
283 | |||
284 | u32 timeout_ms_max; | ||
285 | u32 runlist_id; | ||
286 | |||
299 | bool mmu_nack_handled; | 287 | bool mmu_nack_handled; |
288 | bool has_timedout; | ||
289 | bool referenceable; | ||
290 | bool vpr; | ||
291 | bool deterministic; | ||
292 | /* deterministic, but explicitly idle and submits disallowed */ | ||
293 | bool deterministic_railgate_allowed; | ||
294 | bool cde; | ||
295 | bool usermode_submit_enabled; | ||
296 | bool timeout_debug_dump; | ||
297 | bool has_os_fence_framework_support; | ||
298 | |||
299 | bool is_privileged_channel; | ||
300 | }; | 300 | }; |
301 | 301 | ||
302 | static inline struct channel_gk20a * | 302 | static inline struct channel_gk20a * |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index a60f6f12..3fc7e55f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -453,7 +453,6 @@ struct nvgpu_gr_ctx { | |||
453 | 453 | ||
454 | u32 graphics_preempt_mode; | 454 | u32 graphics_preempt_mode; |
455 | u32 compute_preempt_mode; | 455 | u32 compute_preempt_mode; |
456 | bool boosted_ctx; | ||
457 | 456 | ||
458 | struct nvgpu_mem preempt_ctxsw_buffer; | 457 | struct nvgpu_mem preempt_ctxsw_buffer; |
459 | struct nvgpu_mem spill_ctxsw_buffer; | 458 | struct nvgpu_mem spill_ctxsw_buffer; |
@@ -462,11 +461,12 @@ struct nvgpu_gr_ctx { | |||
462 | u32 ctx_id; | 461 | u32 ctx_id; |
463 | bool ctx_id_valid; | 462 | bool ctx_id_valid; |
464 | bool cilp_preempt_pending; | 463 | bool cilp_preempt_pending; |
464 | bool boosted_ctx; | ||
465 | bool golden_img_loaded; | ||
465 | 466 | ||
466 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 467 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
467 | u64 virt_ctx; | 468 | u64 virt_ctx; |
468 | #endif | 469 | #endif |
469 | bool golden_img_loaded; | ||
470 | 470 | ||
471 | struct patch_desc patch_ctx; | 471 | struct patch_desc patch_ctx; |
472 | struct zcull_ctx_desc zcull_ctx; | 472 | struct zcull_ctx_desc zcull_ctx; |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 2f76477f..552c3bb3 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | |||
@@ -42,34 +42,33 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch); | |||
42 | struct tsg_gk20a { | 42 | struct tsg_gk20a { |
43 | struct gk20a *g; | 43 | struct gk20a *g; |
44 | 44 | ||
45 | bool in_use; | 45 | struct vm_gk20a *vm; |
46 | int tsgid; | 46 | struct nvgpu_mem *eng_method_buffers; |
47 | |||
47 | 48 | ||
49 | struct nvgpu_gr_ctx gr_ctx; | ||
48 | struct nvgpu_ref refcount; | 50 | struct nvgpu_ref refcount; |
49 | 51 | ||
50 | struct nvgpu_list_node ch_list; | 52 | struct nvgpu_list_node ch_list; |
51 | int num_active_channels; | 53 | struct nvgpu_list_node event_id_list; |
52 | struct nvgpu_rwsem ch_list_lock; | 54 | struct nvgpu_rwsem ch_list_lock; |
55 | struct nvgpu_mutex event_id_list_lock; | ||
56 | int num_active_channels; | ||
53 | 57 | ||
54 | unsigned int timeslice_us; | 58 | unsigned int timeslice_us; |
55 | unsigned int timeslice_timeout; | 59 | unsigned int timeslice_timeout; |
56 | unsigned int timeslice_scale; | 60 | unsigned int timeslice_scale; |
57 | 61 | ||
58 | struct vm_gk20a *vm; | ||
59 | |||
60 | u32 interleave_level; | 62 | u32 interleave_level; |
61 | 63 | int tsgid; | |
62 | struct nvgpu_list_node event_id_list; | ||
63 | struct nvgpu_mutex event_id_list_lock; | ||
64 | 64 | ||
65 | u32 runlist_id; | 65 | u32 runlist_id; |
66 | pid_t tgid; | 66 | pid_t tgid; |
67 | struct nvgpu_mem *eng_method_buffers; | ||
68 | u32 num_active_tpcs; | 67 | u32 num_active_tpcs; |
69 | u8 tpc_pg_enabled; | 68 | u8 tpc_pg_enabled; |
70 | bool tpc_num_initialized; | 69 | bool tpc_num_initialized; |
70 | bool in_use; | ||
71 | 71 | ||
72 | struct nvgpu_gr_ctx gr_ctx; | ||
73 | }; | 72 | }; |
74 | 73 | ||
75 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); | 74 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); |