From 52305f0514d29e7fb2cb5e2154188e09faa3fe94 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 13 Aug 2018 20:22:56 -0700 Subject: gpu: nvgpu: Reduce structure padding waste The gk20a_init_fifo_setup_sw_common() function allocates memory of schannel_gk20a and tsg_gk20a tructures for all 512 channels: Size Caller Module Pages Type 749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc 602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc This change just simply reorgnizes the member defines in those two structures to reduce padding waste. After this change: Size Caller Module Pages Type 733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc 585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc In summary, it saves 8 pages in 32KB memory. Bug 2327574 Bug 2284925 Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5 Signed-off-by: Nicolin Chen Reviewed-on: https://git-master.nvidia.com/r/1803358 Reviewed-by: svc-misra-checker Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 52 ++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.h') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9f737192..7c3d950b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -197,7 +197,6 @@ struct channel_gk20a { struct nvgpu_list_node free_chs; struct nvgpu_spinlock ref_obtain_lock; - bool referenceable; nvgpu_atomic_t ref_count; struct nvgpu_cond ref_count_dec_wq; #if GK20A_CHANNEL_REFCOUNT_TRACKING @@ -214,19 +213,14 @@ struct channel_gk20a { struct nvgpu_semaphore_int *hw_sema; - int chid; nvgpu_atomic_t bound; - bool vpr; - bool deterministic; - /* deterministic, but explicitly idle and submits disallowed */ - bool deterministic_railgate_allowed; - bool cde; - bool usermode_submit_enabled; + + int chid; + int tsgid; pid_t pid; pid_t tgid; struct nvgpu_mutex ioctl_lock; - int tsgid; struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ struct channel_gk20a_joblist joblist; @@ -242,16 +236,11 @@ struct channel_gk20a { u64 userd_iova; u64 userd_gpu_va; - u32 obj_class; /* we support only one obj per channel */ - struct priv_cmd_queue priv_cmd_q; struct nvgpu_cond notifier_wq; struct nvgpu_cond semaphore_wq; - u32 timeout_accumulated_ms; - u32 timeout_gpfifo_get; - /* kernel watchdog to kill stuck jobs */ struct channel_gk20a_timeout timeout; @@ -271,32 +260,43 @@ struct channel_gk20a { struct nvgpu_mutex dbg_s_lock; struct nvgpu_list_node dbg_s_list; - bool has_timedout; - u32 timeout_ms_max; - bool timeout_debug_dump; - struct nvgpu_mutex sync_lock; struct gk20a_channel_sync *sync; struct gk20a_channel_sync *user_sync; - bool has_os_fence_framework_support; - #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION u64 virt_ctx; #endif - u32 runlist_id; - - bool is_privileged_channel; - u32 subctx_id; - u32 runqueue_sel; - struct ctx_header_desc ctx_header; /* Any operating system specific data. */ void *os_priv; + u32 obj_class; /* we support only one obj per channel */ + + u32 timeout_accumulated_ms; + u32 timeout_gpfifo_get; + + u32 subctx_id; + u32 runqueue_sel; + + u32 timeout_ms_max; + u32 runlist_id; + bool mmu_nack_handled; + bool has_timedout; + bool referenceable; + bool vpr; + bool deterministic; + /* deterministic, but explicitly idle and submits disallowed */ + bool deterministic_railgate_allowed; + bool cde; + bool usermode_submit_enabled; + bool timeout_debug_dump; + bool has_os_fence_framework_support; + + bool is_privileged_channel; }; static inline struct channel_gk20a * -- cgit v1.2.2