From 52305f0514d29e7fb2cb5e2154188e09faa3fe94 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 13 Aug 2018 20:22:56 -0700 Subject: gpu: nvgpu: Reduce structure padding waste The gk20a_init_fifo_setup_sw_common() function allocates memory of schannel_gk20a and tsg_gk20a tructures for all 512 channels: Size Caller Module Pages Type 749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc 602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc This change just simply reorgnizes the member defines in those two structures to reduce padding waste. After this change: Size Caller Module Pages Type 733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc 585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc In summary, it saves 8 pages in 32KB memory. Bug 2327574 Bug 2284925 Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5 Signed-off-by: Nicolin Chen Reviewed-on: https://git-master.nvidia.com/r/1803358 Reviewed-by: svc-misra-checker Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 52 ++++++++++++++++----------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 +-- drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 19 ++++++------ 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 9f737192..7c3d950b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -197,7 +197,6 @@ struct channel_gk20a { struct nvgpu_list_node free_chs; struct nvgpu_spinlock ref_obtain_lock; - bool referenceable; nvgpu_atomic_t ref_count; struct nvgpu_cond ref_count_dec_wq; #if GK20A_CHANNEL_REFCOUNT_TRACKING @@ -214,19 +213,14 @@ struct channel_gk20a { struct nvgpu_semaphore_int *hw_sema; - int chid; nvgpu_atomic_t bound; - bool vpr; - bool deterministic; - /* deterministic, but explicitly idle and submits disallowed */ - bool deterministic_railgate_allowed; - bool cde; - bool usermode_submit_enabled; + + int chid; + int tsgid; pid_t pid; pid_t tgid; struct nvgpu_mutex ioctl_lock; - int tsgid; struct nvgpu_list_node ch_entry; /* channel's entry in TSG */ struct channel_gk20a_joblist joblist; @@ -242,16 +236,11 @@ struct channel_gk20a { u64 userd_iova; u64 userd_gpu_va; - u32 obj_class; /* we support only one obj per channel */ - struct priv_cmd_queue priv_cmd_q; struct nvgpu_cond notifier_wq; struct nvgpu_cond semaphore_wq; - u32 timeout_accumulated_ms; - u32 timeout_gpfifo_get; - /* kernel watchdog to kill stuck jobs */ struct channel_gk20a_timeout timeout; @@ -271,32 +260,43 @@ struct channel_gk20a { struct nvgpu_mutex dbg_s_lock; struct nvgpu_list_node dbg_s_list; - bool has_timedout; - u32 timeout_ms_max; - bool timeout_debug_dump; - struct nvgpu_mutex sync_lock; struct gk20a_channel_sync *sync; struct gk20a_channel_sync *user_sync; - bool has_os_fence_framework_support; - #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION u64 virt_ctx; #endif - u32 runlist_id; - - bool is_privileged_channel; - u32 subctx_id; - u32 runqueue_sel; - struct ctx_header_desc ctx_header; /* Any operating system specific data. */ void *os_priv; + u32 obj_class; /* we support only one obj per channel */ + + u32 timeout_accumulated_ms; + u32 timeout_gpfifo_get; + + u32 subctx_id; + u32 runqueue_sel; + + u32 timeout_ms_max; + u32 runlist_id; + bool mmu_nack_handled; + bool has_timedout; + bool referenceable; + bool vpr; + bool deterministic; + /* deterministic, but explicitly idle and submits disallowed */ + bool deterministic_railgate_allowed; + bool cde; + bool usermode_submit_enabled; + bool timeout_debug_dump; + bool has_os_fence_framework_support; + + bool is_privileged_channel; }; static inline struct channel_gk20a * diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index a60f6f12..3fc7e55f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -453,7 +453,6 @@ struct nvgpu_gr_ctx { u32 graphics_preempt_mode; u32 compute_preempt_mode; - bool boosted_ctx; struct nvgpu_mem preempt_ctxsw_buffer; struct nvgpu_mem spill_ctxsw_buffer; @@ -462,11 +461,12 @@ struct nvgpu_gr_ctx { u32 ctx_id; bool ctx_id_valid; bool cilp_preempt_pending; + bool boosted_ctx; + bool golden_img_loaded; #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION u64 virt_ctx; #endif - bool golden_img_loaded; struct patch_desc patch_ctx; struct zcull_ctx_desc zcull_ctx; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 2f76477f..552c3bb3 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -42,34 +42,33 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch); struct tsg_gk20a { struct gk20a *g; - bool in_use; - int tsgid; + struct vm_gk20a *vm; + struct nvgpu_mem *eng_method_buffers; + + struct nvgpu_gr_ctx gr_ctx; struct nvgpu_ref refcount; struct nvgpu_list_node ch_list; - int num_active_channels; + struct nvgpu_list_node event_id_list; struct nvgpu_rwsem ch_list_lock; + struct nvgpu_mutex event_id_list_lock; + int num_active_channels; unsigned int timeslice_us; unsigned int timeslice_timeout; unsigned int timeslice_scale; - struct vm_gk20a *vm; - u32 interleave_level; - - struct nvgpu_list_node event_id_list; - struct nvgpu_mutex event_id_list_lock; + int tsgid; u32 runlist_id; pid_t tgid; - struct nvgpu_mem *eng_method_buffers; u32 num_active_tpcs; u8 tpc_pg_enabled; bool tpc_num_initialized; + bool in_use; - struct nvgpu_gr_ctx gr_ctx; }; int gk20a_enable_tsg(struct tsg_gk20a *tsg); -- cgit v1.2.2