gpu: nvgpu: Reduce structure padding waste

The gk20a_init_fifo_setup_sw_common() function allocates memory of schannel_gk20a and tsg_gk20a tructures for all 512 channels: Size Caller Module Pages Type 749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc 602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc This change just simply reorgnizes the member defines in those two structures to reduce padding waste. After this change: Size Caller Module Pages Type 733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc 585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc In summary, it saves 8 pages in 32KB memory. Bug 2327574 Bug 2284925 Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5 Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1803358 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Nicolin Chen <nicolinc@nvidia.com> 2018-08-13 23:22:56 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-08-22 20:33:42 -0400
commit: 52305f0514d29e7fb2cb5e2154188e09faa3fe94 (patch)
tree: f5b50db358366692188e008ee2303dc5135e65ea /drivers/gpu/nvgpu/gk20a/channel_gk20a.h
parent: d5473e225decc74f0d6bb015d06365dad15828d0 (diff)
1 files changed, 26 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 9f737192..7c3d950b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -197,7 +197,6 @@ struct channel_gk20a {
        struct nvgpu_list_node free_chs;
        struct nvgpu_spinlock ref_obtain_lock;
-        bool referenceable;
        nvgpu_atomic_t ref_count;
        struct nvgpu_cond ref_count_dec_wq;
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
@@ -214,19 +213,14 @@ struct channel_gk20a {
        struct nvgpu_semaphore_int *hw_sema;
-        int chid;
        nvgpu_atomic_t bound;
-        bool vpr;
-        bool deterministic;
+        int chid;
-        /* deterministic, but explicitly idle and submits disallowed */
+        int tsgid;
-        bool deterministic_railgate_allowed;
-        bool cde;
-        bool usermode_submit_enabled;
        pid_t pid;
        pid_t tgid;
        struct nvgpu_mutex ioctl_lock;
-        int tsgid;
        struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
        struct channel_gk20a_joblist joblist;
@@ -242,16 +236,11 @@ struct channel_gk20a {
        u64 userd_iova;
        u64 userd_gpu_va;
-        u32 obj_class;  /* we support only one obj per channel */
        struct priv_cmd_queue priv_cmd_q;
        struct nvgpu_cond notifier_wq;
        struct nvgpu_cond semaphore_wq;
-        u32 timeout_accumulated_ms;
-        u32 timeout_gpfifo_get;
        /* kernel watchdog to kill stuck jobs */
        struct channel_gk20a_timeout timeout;
@@ -271,32 +260,43 @@ struct channel_gk20a {
        struct nvgpu_mutex dbg_s_lock;
        struct nvgpu_list_node dbg_s_list;
-        bool has_timedout;
-        u32 timeout_ms_max;
-        bool timeout_debug_dump;
        struct nvgpu_mutex sync_lock;
        struct gk20a_channel_sync *sync;
        struct gk20a_channel_sync *user_sync;
-        bool has_os_fence_framework_support;
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        u64 virt_ctx;
 #endif
-        u32 runlist_id;
-        bool is_privileged_channel;
-        u32 subctx_id;
-        u32 runqueue_sel;
        struct ctx_header_desc ctx_header;
        /* Any operating system specific data. */
        void *os_priv;
+        u32 obj_class;  /* we support only one obj per channel */
+        u32 timeout_accumulated_ms;
+        u32 timeout_gpfifo_get;
+        u32 subctx_id;
+        u32 runqueue_sel;
+        u32 timeout_ms_max;
+        u32 runlist_id;
        bool mmu_nack_handled;
+        bool has_timedout;
+        bool referenceable;
+        bool vpr;
+        bool deterministic;
+        /* deterministic, but explicitly idle and submits disallowed */
+        bool deterministic_railgate_allowed;
+        bool cde;
+        bool usermode_submit_enabled;
+        bool timeout_debug_dump;
+        bool has_os_fence_framework_support;
+        bool is_privileged_channel;
 };
 static inline struct channel_gk20a *
author	Nicolin Chen <nicolinc@nvidia.com>	2018-08-13 23:22:56 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-08-22 20:33:42 -0400
commit	52305f0514d29e7fb2cb5e2154188e09faa3fe94 (patch)
tree	f5b50db358366692188e008ee2303dc5135e65ea /drivers/gpu/nvgpu/gk20a/channel_gk20a.h
parent	d5473e225decc74f0d6bb015d06365dad15828d0 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9f737192..7c3d950b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -197,7 +197,6 @@ struct channel_gk20a {
197	struct nvgpu_list_node free_chs;	197	struct nvgpu_list_node free_chs;
198		198
199	struct nvgpu_spinlock ref_obtain_lock;	199	struct nvgpu_spinlock ref_obtain_lock;
200	bool referenceable;
201	nvgpu_atomic_t ref_count;	200	nvgpu_atomic_t ref_count;
202	struct nvgpu_cond ref_count_dec_wq;	201	struct nvgpu_cond ref_count_dec_wq;
203	#if GK20A_CHANNEL_REFCOUNT_TRACKING	202	#if GK20A_CHANNEL_REFCOUNT_TRACKING
@@ -214,19 +213,14 @@ struct channel_gk20a {
214		213
215	struct nvgpu_semaphore_int *hw_sema;	214	struct nvgpu_semaphore_int *hw_sema;
216		215
217	int chid;
218	nvgpu_atomic_t bound;	216	nvgpu_atomic_t bound;
219	bool vpr;	217
220	bool deterministic;	218	int chid;
221	/* deterministic, but explicitly idle and submits disallowed */	219	int tsgid;
222	bool deterministic_railgate_allowed;
223	bool cde;
224	bool usermode_submit_enabled;
225	pid_t pid;	220	pid_t pid;
226	pid_t tgid;	221	pid_t tgid;
227	struct nvgpu_mutex ioctl_lock;	222	struct nvgpu_mutex ioctl_lock;
228		223
229	int tsgid;
230	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */	224	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
231		225
232	struct channel_gk20a_joblist joblist;	226	struct channel_gk20a_joblist joblist;
@@ -242,16 +236,11 @@ struct channel_gk20a {
242	u64 userd_iova;	236	u64 userd_iova;
243	u64 userd_gpu_va;	237	u64 userd_gpu_va;
244		238
245	u32 obj_class; /* we support only one obj per channel */
246
247	struct priv_cmd_queue priv_cmd_q;	239	struct priv_cmd_queue priv_cmd_q;
248		240
249	struct nvgpu_cond notifier_wq;	241	struct nvgpu_cond notifier_wq;
250	struct nvgpu_cond semaphore_wq;	242	struct nvgpu_cond semaphore_wq;
251		243
252	u32 timeout_accumulated_ms;
253	u32 timeout_gpfifo_get;
254
255	/* kernel watchdog to kill stuck jobs */	244	/* kernel watchdog to kill stuck jobs */
256	struct channel_gk20a_timeout timeout;	245	struct channel_gk20a_timeout timeout;
257		246
@@ -271,32 +260,43 @@ struct channel_gk20a {
271	struct nvgpu_mutex dbg_s_lock;	260	struct nvgpu_mutex dbg_s_lock;
272	struct nvgpu_list_node dbg_s_list;	261	struct nvgpu_list_node dbg_s_list;
273		262
274	bool has_timedout;
275	u32 timeout_ms_max;
276	bool timeout_debug_dump;
277
278	struct nvgpu_mutex sync_lock;	263	struct nvgpu_mutex sync_lock;
279	struct gk20a_channel_sync *sync;	264	struct gk20a_channel_sync *sync;
280	struct gk20a_channel_sync *user_sync;	265	struct gk20a_channel_sync *user_sync;
281		266
282	bool has_os_fence_framework_support;
283
284	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION	267	#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
285	u64 virt_ctx;	268	u64 virt_ctx;
286	#endif	269	#endif
287		270
288	u32 runlist_id;
289
290	bool is_privileged_channel;
291	u32 subctx_id;
292	u32 runqueue_sel;
293
294	struct ctx_header_desc ctx_header;	271	struct ctx_header_desc ctx_header;
295		272
296	/* Any operating system specific data. */	273	/* Any operating system specific data. */
297	void *os_priv;	274	void *os_priv;
298		275
		276	u32 obj_class; /* we support only one obj per channel */
		277
		278	u32 timeout_accumulated_ms;
		279	u32 timeout_gpfifo_get;
		280
		281	u32 subctx_id;
		282	u32 runqueue_sel;
		283
		284	u32 timeout_ms_max;
		285	u32 runlist_id;
		286
299	bool mmu_nack_handled;	287	bool mmu_nack_handled;
		288	bool has_timedout;
		289	bool referenceable;
		290	bool vpr;
		291	bool deterministic;
		292	/* deterministic, but explicitly idle and submits disallowed */
		293	bool deterministic_railgate_allowed;
		294	bool cde;
		295	bool usermode_submit_enabled;
		296	bool timeout_debug_dump;
		297	bool has_os_fence_framework_support;
		298
		299	bool is_privileged_channel;
300	};	300	};
301		301
302	static inline struct channel_gk20a *	302	static inline struct channel_gk20a *