diff options
author | Nicolin Chen <nicolinc@nvidia.com> | 2018-08-13 23:22:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-22 20:33:42 -0400 |
commit | 52305f0514d29e7fb2cb5e2154188e09faa3fe94 (patch) | |
tree | f5b50db358366692188e008ee2303dc5135e65ea /drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |
parent | d5473e225decc74f0d6bb015d06365dad15828d0 (diff) |
gpu: nvgpu: Reduce structure padding waste
The gk20a_init_fifo_setup_sw_common() function allocates memory of
schannel_gk20a and tsg_gk20a tructures for all 512 channels:
Size Caller Module Pages Type
749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc
602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc
This change just simply reorgnizes the member defines in those two
structures to reduce padding waste. After this change:
Size Caller Module Pages Type
733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc
585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc
In summary, it saves 8 pages in 32KB memory.
Bug 2327574
Bug 2284925
Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1803358
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9f737192..7c3d950b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -197,7 +197,6 @@ struct channel_gk20a { | |||
197 | struct nvgpu_list_node free_chs; | 197 | struct nvgpu_list_node free_chs; |
198 | 198 | ||
199 | struct nvgpu_spinlock ref_obtain_lock; | 199 | struct nvgpu_spinlock ref_obtain_lock; |
200 | bool referenceable; | ||
201 | nvgpu_atomic_t ref_count; | 200 | nvgpu_atomic_t ref_count; |
202 | struct nvgpu_cond ref_count_dec_wq; | 201 | struct nvgpu_cond ref_count_dec_wq; |
203 | #if GK20A_CHANNEL_REFCOUNT_TRACKING | 202 | #if GK20A_CHANNEL_REFCOUNT_TRACKING |
@@ -214,19 +213,14 @@ struct channel_gk20a { | |||
214 | 213 | ||
215 | struct nvgpu_semaphore_int *hw_sema; | 214 | struct nvgpu_semaphore_int *hw_sema; |
216 | 215 | ||
217 | int chid; | ||
218 | nvgpu_atomic_t bound; | 216 | nvgpu_atomic_t bound; |
219 | bool vpr; | 217 | |
220 | bool deterministic; | 218 | int chid; |
221 | /* deterministic, but explicitly idle and submits disallowed */ | 219 | int tsgid; |
222 | bool deterministic_railgate_allowed; | ||
223 | bool cde; | ||
224 | bool usermode_submit_enabled; | ||
225 | pid_t pid; | 220 | pid_t pid; |
226 | pid_t tgid; | 221 | pid_t tgid; |
227 | struct nvgpu_mutex ioctl_lock; | 222 | struct nvgpu_mutex ioctl_lock; |
228 | 223 | ||
229 | int tsgid; | ||
230 | struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ | 224 | struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ |
231 | 225 | ||
232 | struct channel_gk20a_joblist joblist; | 226 | struct channel_gk20a_joblist joblist; |
@@ -242,16 +236,11 @@ struct channel_gk20a { | |||
242 | u64 userd_iova; | 236 | u64 userd_iova; |
243 | u64 userd_gpu_va; | 237 | u64 userd_gpu_va; |
244 | 238 | ||
245 | u32 obj_class; /* we support only one obj per channel */ | ||
246 | |||
247 | struct priv_cmd_queue priv_cmd_q; | 239 | struct priv_cmd_queue priv_cmd_q; |
248 | 240 | ||
249 | struct nvgpu_cond notifier_wq; | 241 | struct nvgpu_cond notifier_wq; |
250 | struct nvgpu_cond semaphore_wq; | 242 | struct nvgpu_cond semaphore_wq; |
251 | 243 | ||
252 | u32 timeout_accumulated_ms; | ||
253 | u32 timeout_gpfifo_get; | ||
254 | |||
255 | /* kernel watchdog to kill stuck jobs */ | 244 | /* kernel watchdog to kill stuck jobs */ |
256 | struct channel_gk20a_timeout timeout; | 245 | struct channel_gk20a_timeout timeout; |
257 | 246 | ||
@@ -271,32 +260,43 @@ struct channel_gk20a { | |||
271 | struct nvgpu_mutex dbg_s_lock; | 260 | struct nvgpu_mutex dbg_s_lock; |
272 | struct nvgpu_list_node dbg_s_list; | 261 | struct nvgpu_list_node dbg_s_list; |
273 | 262 | ||
274 | bool has_timedout; | ||
275 | u32 timeout_ms_max; | ||
276 | bool timeout_debug_dump; | ||
277 | |||
278 | struct nvgpu_mutex sync_lock; | 263 | struct nvgpu_mutex sync_lock; |
279 | struct gk20a_channel_sync *sync; | 264 | struct gk20a_channel_sync *sync; |
280 | struct gk20a_channel_sync *user_sync; | 265 | struct gk20a_channel_sync *user_sync; |
281 | 266 | ||
282 | bool has_os_fence_framework_support; | ||
283 | |||
284 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 267 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
285 | u64 virt_ctx; | 268 | u64 virt_ctx; |
286 | #endif | 269 | #endif |
287 | 270 | ||
288 | u32 runlist_id; | ||
289 | |||
290 | bool is_privileged_channel; | ||
291 | u32 subctx_id; | ||
292 | u32 runqueue_sel; | ||
293 | |||
294 | struct ctx_header_desc ctx_header; | 271 | struct ctx_header_desc ctx_header; |
295 | 272 | ||
296 | /* Any operating system specific data. */ | 273 | /* Any operating system specific data. */ |
297 | void *os_priv; | 274 | void *os_priv; |
298 | 275 | ||
276 | u32 obj_class; /* we support only one obj per channel */ | ||
277 | |||
278 | u32 timeout_accumulated_ms; | ||
279 | u32 timeout_gpfifo_get; | ||
280 | |||
281 | u32 subctx_id; | ||
282 | u32 runqueue_sel; | ||
283 | |||
284 | u32 timeout_ms_max; | ||
285 | u32 runlist_id; | ||
286 | |||
299 | bool mmu_nack_handled; | 287 | bool mmu_nack_handled; |
288 | bool has_timedout; | ||
289 | bool referenceable; | ||
290 | bool vpr; | ||
291 | bool deterministic; | ||
292 | /* deterministic, but explicitly idle and submits disallowed */ | ||
293 | bool deterministic_railgate_allowed; | ||
294 | bool cde; | ||
295 | bool usermode_submit_enabled; | ||
296 | bool timeout_debug_dump; | ||
297 | bool has_os_fence_framework_support; | ||
298 | |||
299 | bool is_privileged_channel; | ||
300 | }; | 300 | }; |
301 | 301 | ||
302 | static inline struct channel_gk20a * | 302 | static inline struct channel_gk20a * |