summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolin Chen <nicolinc@nvidia.com>2018-08-13 23:22:56 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-08-22 20:33:42 -0400
commit52305f0514d29e7fb2cb5e2154188e09faa3fe94 (patch)
treef5b50db358366692188e008ee2303dc5135e65ea
parentd5473e225decc74f0d6bb015d06365dad15828d0 (diff)
gpu: nvgpu: Reduce structure padding waste
The gk20a_init_fifo_setup_sw_common() function allocates memory of schannel_gk20a and tsg_gk20a tructures for all 512 channels: Size Caller Module Pages Type 749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc 602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc This change just simply reorgnizes the member defines in those two structures to reduce padding waste. After this change: Size Caller Module Pages Type 733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc 585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc In summary, it saves 8 pages in 32KB memory. Bug 2327574 Bug 2284925 Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5 Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1803358 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h52
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.h19
3 files changed, 37 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 9f737192..7c3d950b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -197,7 +197,6 @@ struct channel_gk20a {
197 struct nvgpu_list_node free_chs; 197 struct nvgpu_list_node free_chs;
198 198
199 struct nvgpu_spinlock ref_obtain_lock; 199 struct nvgpu_spinlock ref_obtain_lock;
200 bool referenceable;
201 nvgpu_atomic_t ref_count; 200 nvgpu_atomic_t ref_count;
202 struct nvgpu_cond ref_count_dec_wq; 201 struct nvgpu_cond ref_count_dec_wq;
203#if GK20A_CHANNEL_REFCOUNT_TRACKING 202#if GK20A_CHANNEL_REFCOUNT_TRACKING
@@ -214,19 +213,14 @@ struct channel_gk20a {
214 213
215 struct nvgpu_semaphore_int *hw_sema; 214 struct nvgpu_semaphore_int *hw_sema;
216 215
217 int chid;
218 nvgpu_atomic_t bound; 216 nvgpu_atomic_t bound;
219 bool vpr; 217
220 bool deterministic; 218 int chid;
221 /* deterministic, but explicitly idle and submits disallowed */ 219 int tsgid;
222 bool deterministic_railgate_allowed;
223 bool cde;
224 bool usermode_submit_enabled;
225 pid_t pid; 220 pid_t pid;
226 pid_t tgid; 221 pid_t tgid;
227 struct nvgpu_mutex ioctl_lock; 222 struct nvgpu_mutex ioctl_lock;
228 223
229 int tsgid;
230 struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ 224 struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
231 225
232 struct channel_gk20a_joblist joblist; 226 struct channel_gk20a_joblist joblist;
@@ -242,16 +236,11 @@ struct channel_gk20a {
242 u64 userd_iova; 236 u64 userd_iova;
243 u64 userd_gpu_va; 237 u64 userd_gpu_va;
244 238
245 u32 obj_class; /* we support only one obj per channel */
246
247 struct priv_cmd_queue priv_cmd_q; 239 struct priv_cmd_queue priv_cmd_q;
248 240
249 struct nvgpu_cond notifier_wq; 241 struct nvgpu_cond notifier_wq;
250 struct nvgpu_cond semaphore_wq; 242 struct nvgpu_cond semaphore_wq;
251 243
252 u32 timeout_accumulated_ms;
253 u32 timeout_gpfifo_get;
254
255 /* kernel watchdog to kill stuck jobs */ 244 /* kernel watchdog to kill stuck jobs */
256 struct channel_gk20a_timeout timeout; 245 struct channel_gk20a_timeout timeout;
257 246
@@ -271,32 +260,43 @@ struct channel_gk20a {
271 struct nvgpu_mutex dbg_s_lock; 260 struct nvgpu_mutex dbg_s_lock;
272 struct nvgpu_list_node dbg_s_list; 261 struct nvgpu_list_node dbg_s_list;
273 262
274 bool has_timedout;
275 u32 timeout_ms_max;
276 bool timeout_debug_dump;
277
278 struct nvgpu_mutex sync_lock; 263 struct nvgpu_mutex sync_lock;
279 struct gk20a_channel_sync *sync; 264 struct gk20a_channel_sync *sync;
280 struct gk20a_channel_sync *user_sync; 265 struct gk20a_channel_sync *user_sync;
281 266
282 bool has_os_fence_framework_support;
283
284#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 267#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
285 u64 virt_ctx; 268 u64 virt_ctx;
286#endif 269#endif
287 270
288 u32 runlist_id;
289
290 bool is_privileged_channel;
291 u32 subctx_id;
292 u32 runqueue_sel;
293
294 struct ctx_header_desc ctx_header; 271 struct ctx_header_desc ctx_header;
295 272
296 /* Any operating system specific data. */ 273 /* Any operating system specific data. */
297 void *os_priv; 274 void *os_priv;
298 275
276 u32 obj_class; /* we support only one obj per channel */
277
278 u32 timeout_accumulated_ms;
279 u32 timeout_gpfifo_get;
280
281 u32 subctx_id;
282 u32 runqueue_sel;
283
284 u32 timeout_ms_max;
285 u32 runlist_id;
286
299 bool mmu_nack_handled; 287 bool mmu_nack_handled;
288 bool has_timedout;
289 bool referenceable;
290 bool vpr;
291 bool deterministic;
292 /* deterministic, but explicitly idle and submits disallowed */
293 bool deterministic_railgate_allowed;
294 bool cde;
295 bool usermode_submit_enabled;
296 bool timeout_debug_dump;
297 bool has_os_fence_framework_support;
298
299 bool is_privileged_channel;
300}; 300};
301 301
302static inline struct channel_gk20a * 302static inline struct channel_gk20a *
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index a60f6f12..3fc7e55f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -453,7 +453,6 @@ struct nvgpu_gr_ctx {
453 453
454 u32 graphics_preempt_mode; 454 u32 graphics_preempt_mode;
455 u32 compute_preempt_mode; 455 u32 compute_preempt_mode;
456 bool boosted_ctx;
457 456
458 struct nvgpu_mem preempt_ctxsw_buffer; 457 struct nvgpu_mem preempt_ctxsw_buffer;
459 struct nvgpu_mem spill_ctxsw_buffer; 458 struct nvgpu_mem spill_ctxsw_buffer;
@@ -462,11 +461,12 @@ struct nvgpu_gr_ctx {
462 u32 ctx_id; 461 u32 ctx_id;
463 bool ctx_id_valid; 462 bool ctx_id_valid;
464 bool cilp_preempt_pending; 463 bool cilp_preempt_pending;
464 bool boosted_ctx;
465 bool golden_img_loaded;
465 466
466#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 467#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
467 u64 virt_ctx; 468 u64 virt_ctx;
468#endif 469#endif
469 bool golden_img_loaded;
470 470
471 struct patch_desc patch_ctx; 471 struct patch_desc patch_ctx;
472 struct zcull_ctx_desc zcull_ctx; 472 struct zcull_ctx_desc zcull_ctx;
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index 2f76477f..552c3bb3 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -42,34 +42,33 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
42struct tsg_gk20a { 42struct tsg_gk20a {
43 struct gk20a *g; 43 struct gk20a *g;
44 44
45 bool in_use; 45 struct vm_gk20a *vm;
46 int tsgid; 46 struct nvgpu_mem *eng_method_buffers;
47
47 48
49 struct nvgpu_gr_ctx gr_ctx;
48 struct nvgpu_ref refcount; 50 struct nvgpu_ref refcount;
49 51
50 struct nvgpu_list_node ch_list; 52 struct nvgpu_list_node ch_list;
51 int num_active_channels; 53 struct nvgpu_list_node event_id_list;
52 struct nvgpu_rwsem ch_list_lock; 54 struct nvgpu_rwsem ch_list_lock;
55 struct nvgpu_mutex event_id_list_lock;
56 int num_active_channels;
53 57
54 unsigned int timeslice_us; 58 unsigned int timeslice_us;
55 unsigned int timeslice_timeout; 59 unsigned int timeslice_timeout;
56 unsigned int timeslice_scale; 60 unsigned int timeslice_scale;
57 61
58 struct vm_gk20a *vm;
59
60 u32 interleave_level; 62 u32 interleave_level;
61 63 int tsgid;
62 struct nvgpu_list_node event_id_list;
63 struct nvgpu_mutex event_id_list_lock;
64 64
65 u32 runlist_id; 65 u32 runlist_id;
66 pid_t tgid; 66 pid_t tgid;
67 struct nvgpu_mem *eng_method_buffers;
68 u32 num_active_tpcs; 67 u32 num_active_tpcs;
69 u8 tpc_pg_enabled; 68 u8 tpc_pg_enabled;
70 bool tpc_num_initialized; 69 bool tpc_num_initialized;
70 bool in_use;
71 71
72 struct nvgpu_gr_ctx gr_ctx;
73}; 72};
74 73
75int gk20a_enable_tsg(struct tsg_gk20a *tsg); 74int gk20a_enable_tsg(struct tsg_gk20a *tsg);