gpu: nvgpu: Reduce structure padding waste

The gk20a_init_fifo_setup_sw_common() function allocates memory of schannel_gk20a and tsg_gk20a tructures for all 512 channels: Size Caller Module Pages Type 749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc 602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc This change just simply reorgnizes the member defines in those two structures to reduce padding waste. After this change: Size Caller Module Pages Type 733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc 585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc In summary, it saves 8 pages in 32KB memory. Bug 2327574 Bug 2284925 Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5 Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1803358 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Nicolin Chen <nicolinc@nvidia.com> 2018-08-13 23:22:56 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-08-22 20:33:42 -0400
commit: 52305f0514d29e7fb2cb5e2154188e09faa3fe94 (patch)
tree: f5b50db358366692188e008ee2303dc5135e65ea
parent: d5473e225decc74f0d6bb015d06365dad15828d0 (diff)
3 files changed, 37 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 9f737192..7c3d950b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -197,7 +197,6 @@ struct channel_gk20a {
        struct nvgpu_list_node free_chs;
        struct nvgpu_spinlock ref_obtain_lock;
-        bool referenceable;
        nvgpu_atomic_t ref_count;
        struct nvgpu_cond ref_count_dec_wq;
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
@@ -214,19 +213,14 @@ struct channel_gk20a {
        struct nvgpu_semaphore_int *hw_sema;
-        int chid;
        nvgpu_atomic_t bound;
-        bool vpr;
-        bool deterministic;
+        int chid;
-        /* deterministic, but explicitly idle and submits disallowed */
+        int tsgid;
-        bool deterministic_railgate_allowed;
-        bool cde;
-        bool usermode_submit_enabled;
        pid_t pid;
        pid_t tgid;
        struct nvgpu_mutex ioctl_lock;
-        int tsgid;
        struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
        struct channel_gk20a_joblist joblist;
@@ -242,16 +236,11 @@ struct channel_gk20a {
        u64 userd_iova;
        u64 userd_gpu_va;
-        u32 obj_class;  /* we support only one obj per channel */
        struct priv_cmd_queue priv_cmd_q;
        struct nvgpu_cond notifier_wq;
        struct nvgpu_cond semaphore_wq;
-        u32 timeout_accumulated_ms;
-        u32 timeout_gpfifo_get;
        /* kernel watchdog to kill stuck jobs */
        struct channel_gk20a_timeout timeout;
@@ -271,32 +260,43 @@ struct channel_gk20a {
        struct nvgpu_mutex dbg_s_lock;
        struct nvgpu_list_node dbg_s_list;
-        bool has_timedout;
-        u32 timeout_ms_max;
-        bool timeout_debug_dump;
        struct nvgpu_mutex sync_lock;
        struct gk20a_channel_sync *sync;
        struct gk20a_channel_sync *user_sync;
-        bool has_os_fence_framework_support;
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        u64 virt_ctx;
 #endif
-        u32 runlist_id;
-        bool is_privileged_channel;
-        u32 subctx_id;
-        u32 runqueue_sel;
        struct ctx_header_desc ctx_header;
        /* Any operating system specific data. */
        void *os_priv;
+        u32 obj_class;  /* we support only one obj per channel */
+        u32 timeout_accumulated_ms;
+        u32 timeout_gpfifo_get;
+        u32 subctx_id;
+        u32 runqueue_sel;
+        u32 timeout_ms_max;
+        u32 runlist_id;
        bool mmu_nack_handled;
+        bool has_timedout;
+        bool referenceable;
+        bool vpr;
+        bool deterministic;
+        /* deterministic, but explicitly idle and submits disallowed */
+        bool deterministic_railgate_allowed;
+        bool cde;
+        bool usermode_submit_enabled;
+        bool timeout_debug_dump;
+        bool has_os_fence_framework_support;
+        bool is_privileged_channel;
 };
 static inline struct channel_gk20a *
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index a60f6f12..3fc7e55f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -453,7 +453,6 @@ struct nvgpu_gr_ctx {
        u32 graphics_preempt_mode;
        u32 compute_preempt_mode;
-        bool boosted_ctx;
        struct nvgpu_mem preempt_ctxsw_buffer;
        struct nvgpu_mem spill_ctxsw_buffer;
@@ -462,11 +461,12 @@ struct nvgpu_gr_ctx {
        u32 ctx_id;
        bool ctx_id_valid;
        bool cilp_preempt_pending;
+        bool boosted_ctx;
+        bool golden_img_loaded;
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        u64 virt_ctx;
 #endif
-        bool golden_img_loaded;
        struct patch_desc       patch_ctx;
        struct zcull_ctx_desc   zcull_ctx;
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index 2f76477f..552c3bb3 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -42,34 +42,33 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
 struct tsg_gk20a {
        struct gk20a *g;
-        bool in_use;
+        struct vm_gk20a *vm;
-        int tsgid;
+        struct nvgpu_mem *eng_method_buffers;
+        struct nvgpu_gr_ctx gr_ctx;
        struct nvgpu_ref refcount;
        struct nvgpu_list_node ch_list;
-        int num_active_channels;
+        struct nvgpu_list_node event_id_list;
        struct nvgpu_rwsem ch_list_lock;
+        struct nvgpu_mutex event_id_list_lock;
+        int num_active_channels;
        unsigned int timeslice_us;
        unsigned int timeslice_timeout;
        unsigned int timeslice_scale;
-        struct vm_gk20a *vm;
        u32 interleave_level;
+        int tsgid;
-        struct nvgpu_list_node event_id_list;
-        struct nvgpu_mutex event_id_list_lock;
        u32 runlist_id;
        pid_t tgid;
-        struct nvgpu_mem *eng_method_buffers;
        u32  num_active_tpcs;
        u8   tpc_pg_enabled;
        bool tpc_num_initialized;
+        bool in_use;
-        struct nvgpu_gr_ctx gr_ctx;
 };
 int gk20a_enable_tsg(struct tsg_gk20a *tsg);
author	Nicolin Chen <nicolinc@nvidia.com>	2018-08-13 23:22:56 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-08-22 20:33:42 -0400
commit	52305f0514d29e7fb2cb5e2154188e09faa3fe94 (patch)
tree	f5b50db358366692188e008ee2303dc5135e65ea
parent	d5473e225decc74f0d6bb015d06365dad15828d0 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9f737192..7c3d950b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -197,7 +197,6 @@ struct channel_gk20a {
197	struct nvgpu_list_node free_chs;	197	struct nvgpu_list_node free_chs;
198		198
199	struct nvgpu_spinlock ref_obtain_lock;	199	struct nvgpu_spinlock ref_obtain_lock;
200	bool referenceable;
201	nvgpu_atomic_t ref_count;	200	nvgpu_atomic_t ref_count;
202	struct nvgpu_cond ref_count_dec_wq;	201	struct nvgpu_cond ref_count_dec_wq;
203	#if GK20A_CHANNEL_REFCOUNT_TRACKING	202	#if GK20A_CHANNEL_REFCOUNT_TRACKING
@@ -214,19 +213,14 @@ struct channel_gk20a {
214		213
215	struct nvgpu_semaphore_int *hw_sema;	214	struct nvgpu_semaphore_int *hw_sema;
216		215
217	int chid;
218	nvgpu_atomic_t bound;	216	nvgpu_atomic_t bound;
219	bool vpr;	217
220	bool deterministic;	218	int chid;
221	/* deterministic, but explicitly idle and submits disallowed */	219	int tsgid;
222	bool deterministic_railgate_allowed;
223	bool cde;
224	bool usermode_submit_enabled;
225	pid_t pid;	220	pid_t pid;
226	pid_t tgid;	221	pid_t tgid;
227	struct nvgpu_mutex ioctl_lock;	222	struct nvgpu_mutex ioctl_lock;
228		223
229	int tsgid;
230	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */	224	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
231		225
232	struct channel_gk20a_joblist joblist;	226	struct channel_gk20a_joblist joblist;
@@ -242,16 +236,11 @@ struct channel_gk20a {
242	u64 userd_iova;	236	u64 userd_iova;
243	u64 userd_gpu_va;	237	u64 userd_gpu_va;
244		238
245	u32 obj_class; /* we support only one obj per channel */
246
247	struct priv_cmd_queue priv_cmd_q;	239	struct priv_cmd_queue priv_cmd_q;
248		240
249	struct nvgpu_cond notifier_wq;	241	struct nvgpu_cond notifier_wq;
250	struct nvgpu_cond semaphore_wq;	242	struct nvgpu_cond semaphore_wq;
251		243
252	u32 timeout_accumulated_ms;
253	u32 timeout_gpfifo_get;
254
255	/* kernel watchdog to kill stuck jobs */	244	/* kernel watchdog to kill stuck jobs */
256	struct channel_gk20a_timeout timeout;	245	struct channel_gk20a_timeout timeout;
257		246
@@ -271,32 +260,43 @@ struct channel_gk20a {
271	struct nvgpu_mutex dbg_s_lock;	260	struct nvgpu_mutex dbg_s_lock;
272	struct nvgpu_list_node dbg_s_list;	261	struct nvgpu_list_node dbg_s_list;
273		262
274	bool has_timedout;
275	u32 timeout_ms_max;
276	bool timeout_debug_dump;
277
278	struct nvgpu_mutex sync_lock;	263	struct nvgpu_mutex sync_lock;
279	struct gk20a_channel_sync *sync;	264	struct gk20a_channel_sync *sync;
280	struct gk20a_channel_sync *user_sync;	265	struct gk20a_channel_sync *user_sync;
281		266
282	bool has_os_fence_framework_support;
283
284	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION	267	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
285	u64 virt_ctx;	268	u64 virt_ctx;
286	#endif	269	#endif
287		270
288	u32 runlist_id;
289
290	bool is_privileged_channel;
291	u32 subctx_id;
292	u32 runqueue_sel;
293
294	struct ctx_header_desc ctx_header;	271	struct ctx_header_desc ctx_header;
295		272
296	/* Any operating system specific data. */	273	/* Any operating system specific data. */
297	void *os_priv;	274	void *os_priv;
298		275
		276	u32 obj_class; /* we support only one obj per channel */
		277
		278	u32 timeout_accumulated_ms;
		279	u32 timeout_gpfifo_get;
		280
		281	u32 subctx_id;
		282	u32 runqueue_sel;
		283
		284	u32 timeout_ms_max;
		285	u32 runlist_id;
		286
299	bool mmu_nack_handled;	287	bool mmu_nack_handled;
		288	bool has_timedout;
		289	bool referenceable;
		290	bool vpr;
		291	bool deterministic;
		292	/* deterministic, but explicitly idle and submits disallowed */
		293	bool deterministic_railgate_allowed;
		294	bool cde;
		295	bool usermode_submit_enabled;
		296	bool timeout_debug_dump;
		297	bool has_os_fence_framework_support;
		298
		299	bool is_privileged_channel;
300	};	300	};
301		301
302	static inline struct channel_gk20a *	302	static inline struct channel_gk20a *


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index a60f6f12..3fc7e55f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -453,7 +453,6 @@ struct nvgpu_gr_ctx {
453		453
454	u32 graphics_preempt_mode;	454	u32 graphics_preempt_mode;
455	u32 compute_preempt_mode;	455	u32 compute_preempt_mode;
456	bool boosted_ctx;
457		456
458	struct nvgpu_mem preempt_ctxsw_buffer;	457	struct nvgpu_mem preempt_ctxsw_buffer;
459	struct nvgpu_mem spill_ctxsw_buffer;	458	struct nvgpu_mem spill_ctxsw_buffer;
@@ -462,11 +461,12 @@ struct nvgpu_gr_ctx {
462	u32 ctx_id;	461	u32 ctx_id;
463	bool ctx_id_valid;	462	bool ctx_id_valid;
464	bool cilp_preempt_pending;	463	bool cilp_preempt_pending;
		464	bool boosted_ctx;
		465	bool golden_img_loaded;
465		466
466	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION	467	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
467	u64 virt_ctx;	468	u64 virt_ctx;
468	#endif	469	#endif
469	bool golden_img_loaded;
470		470
471	struct patch_desc patch_ctx;	471	struct patch_desc patch_ctx;
472	struct zcull_ctx_desc zcull_ctx;	472	struct zcull_ctx_desc zcull_ctx;


diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 2f76477f..552c3bb3 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -42,34 +42,33 @@ struct tsg_gk20a tsg_gk20a_from_ch(struct channel_gk20a ch);
42	struct tsg_gk20a {	42	struct tsg_gk20a {
43	struct gk20a *g;	43	struct gk20a *g;
44		44
45	bool in_use;	45	struct vm_gk20a *vm;
46	int tsgid;	46	struct nvgpu_mem *eng_method_buffers;
		47
47		48
		49	struct nvgpu_gr_ctx gr_ctx;
48	struct nvgpu_ref refcount;	50	struct nvgpu_ref refcount;
49		51
50	struct nvgpu_list_node ch_list;	52	struct nvgpu_list_node ch_list;
51	int num_active_channels;	53	struct nvgpu_list_node event_id_list;
52	struct nvgpu_rwsem ch_list_lock;	54	struct nvgpu_rwsem ch_list_lock;
		55	struct nvgpu_mutex event_id_list_lock;
		56	int num_active_channels;
53		57
54	unsigned int timeslice_us;	58	unsigned int timeslice_us;
55	unsigned int timeslice_timeout;	59	unsigned int timeslice_timeout;
56	unsigned int timeslice_scale;	60	unsigned int timeslice_scale;
57		61
58	struct vm_gk20a *vm;
59
60	u32 interleave_level;	62	u32 interleave_level;
61		63	int tsgid;
62	struct nvgpu_list_node event_id_list;
63	struct nvgpu_mutex event_id_list_lock;
64		64
65	u32 runlist_id;	65	u32 runlist_id;
66	pid_t tgid;	66	pid_t tgid;
67	struct nvgpu_mem *eng_method_buffers;
68	u32 num_active_tpcs;	67	u32 num_active_tpcs;
69	u8 tpc_pg_enabled;	68	u8 tpc_pg_enabled;
70	bool tpc_num_initialized;	69	bool tpc_num_initialized;
		70	bool in_use;
71		71
72	struct nvgpu_gr_ctx gr_ctx;
73	};	72	};
74		73
75	int gk20a_enable_tsg(struct tsg_gk20a *tsg);	74	int gk20a_enable_tsg(struct tsg_gk20a *tsg);