diff options
author | Sebastian Sanchez <sebastian.sanchez@intel.com> | 2016-10-17 07:19:35 -0400 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-11-15 16:37:27 -0500 |
commit | 6e768f0682e26e7683c4af5b0de04a2e08bc67db (patch) | |
tree | 21b4179a223198acf1f052854bfade590a8e35bf | |
parent | a6cd5f08e085176fe8dbd4c57217058e53dfc4d9 (diff) |
IB/hfi1: Optimize devdata cachelines
Profiling shows hot path struct members that need
to be in a minimum set of cachelines.
Group these struct member in the same cacheline:
sc2vl_lock
sc2vl
rhf_rcv_function_map
rcv_limit
rhf_offset
Group these struct member in the same cacheline:
process_pio_send
process_dma_send
pport
rcd
int_counter
flags
num_pports
first_user_ctxt
Fill holes in struct hfi1_devdata revealed by pahole.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r-- | drivers/infiniband/hw/hfi1/hfi.h | 108 |
1 files changed, 55 insertions, 53 deletions
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index a2ea643a79fc..25e44c4af995 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h | |||
@@ -852,32 +852,29 @@ struct hfi1_devdata { | |||
852 | u8 __iomem *kregend; | 852 | u8 __iomem *kregend; |
853 | /* physical address of chip for io_remap, etc. */ | 853 | /* physical address of chip for io_remap, etc. */ |
854 | resource_size_t physaddr; | 854 | resource_size_t physaddr; |
855 | /* receive context data */ | 855 | /* Per VL data. Enough for all VLs but not all elements are set/used. */ |
856 | struct hfi1_ctxtdata **rcd; | 856 | struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; |
857 | /* send context data */ | 857 | /* send context data */ |
858 | struct send_context_info *send_contexts; | 858 | struct send_context_info *send_contexts; |
859 | /* map hardware send contexts to software index */ | 859 | /* map hardware send contexts to software index */ |
860 | u8 *hw_to_sw; | 860 | u8 *hw_to_sw; |
861 | /* spinlock for allocating and releasing send context resources */ | 861 | /* spinlock for allocating and releasing send context resources */ |
862 | spinlock_t sc_lock; | 862 | spinlock_t sc_lock; |
863 | /* Per VL data. Enough for all VLs but not all elements are set/used. */ | ||
864 | struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; | ||
865 | /* lock for pio_map */ | 863 | /* lock for pio_map */ |
866 | spinlock_t pio_map_lock; | 864 | spinlock_t pio_map_lock; |
865 | /* Send Context initialization lock. */ | ||
866 | spinlock_t sc_init_lock; | ||
867 | /* lock for sdma_map */ | ||
868 | spinlock_t sde_map_lock; | ||
867 | /* array of kernel send contexts */ | 869 | /* array of kernel send contexts */ |
868 | struct send_context **kernel_send_context; | 870 | struct send_context **kernel_send_context; |
869 | /* array of vl maps */ | 871 | /* array of vl maps */ |
870 | struct pio_vl_map __rcu *pio_map; | 872 | struct pio_vl_map __rcu *pio_map; |
871 | /* seqlock for sc2vl */ | 873 | /* default flags to last descriptor */ |
872 | seqlock_t sc2vl_lock; | 874 | u64 default_desc1; |
873 | u64 sc2vl[4]; | ||
874 | /* Send Context initialization lock. */ | ||
875 | spinlock_t sc_init_lock; | ||
876 | 875 | ||
877 | /* fields common to all SDMA engines */ | 876 | /* fields common to all SDMA engines */ |
878 | 877 | ||
879 | /* default flags to last descriptor */ | ||
880 | u64 default_desc1; | ||
881 | volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ | 878 | volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ |
882 | dma_addr_t sdma_heads_phys; | 879 | dma_addr_t sdma_heads_phys; |
883 | void *sdma_pad_dma; /* DMA'ed by chip */ | 880 | void *sdma_pad_dma; /* DMA'ed by chip */ |
@@ -888,8 +885,6 @@ struct hfi1_devdata { | |||
888 | u32 chip_sdma_engines; | 885 | u32 chip_sdma_engines; |
889 | /* num used */ | 886 | /* num used */ |
890 | u32 num_sdma; | 887 | u32 num_sdma; |
891 | /* lock for sdma_map */ | ||
892 | spinlock_t sde_map_lock; | ||
893 | /* array of engines sized by num_sdma */ | 888 | /* array of engines sized by num_sdma */ |
894 | struct sdma_engine *per_sdma; | 889 | struct sdma_engine *per_sdma; |
895 | /* array of vl maps */ | 890 | /* array of vl maps */ |
@@ -898,14 +893,11 @@ struct hfi1_devdata { | |||
898 | wait_queue_head_t sdma_unfreeze_wq; | 893 | wait_queue_head_t sdma_unfreeze_wq; |
899 | atomic_t sdma_unfreeze_count; | 894 | atomic_t sdma_unfreeze_count; |
900 | 895 | ||
896 | u32 lcb_access_count; /* count of LCB users */ | ||
897 | |||
901 | /* common data between shared ASIC HFIs in this OS */ | 898 | /* common data between shared ASIC HFIs in this OS */ |
902 | struct hfi1_asic_data *asic_data; | 899 | struct hfi1_asic_data *asic_data; |
903 | 900 | ||
904 | /* hfi1_pportdata, points to array of (physical) port-specific | ||
905 | * data structs, indexed by pidx (0..n-1) | ||
906 | */ | ||
907 | struct hfi1_pportdata *pport; | ||
908 | |||
909 | /* mem-mapped pointer to base of PIO buffers */ | 901 | /* mem-mapped pointer to base of PIO buffers */ |
910 | void __iomem *piobase; | 902 | void __iomem *piobase; |
911 | /* | 903 | /* |
@@ -922,20 +914,13 @@ struct hfi1_devdata { | |||
922 | /* send context numbers and sizes for each type */ | 914 | /* send context numbers and sizes for each type */ |
923 | struct sc_config_sizes sc_sizes[SC_MAX]; | 915 | struct sc_config_sizes sc_sizes[SC_MAX]; |
924 | 916 | ||
925 | u32 lcb_access_count; /* count of LCB users */ | ||
926 | |||
927 | char *boardname; /* human readable board info */ | 917 | char *boardname; /* human readable board info */ |
928 | 918 | ||
929 | /* device (not port) flags, basically device capabilities */ | ||
930 | u32 flags; | ||
931 | |||
932 | /* reset value */ | 919 | /* reset value */ |
933 | u64 z_int_counter; | 920 | u64 z_int_counter; |
934 | u64 z_rcv_limit; | 921 | u64 z_rcv_limit; |
935 | u64 z_send_schedule; | 922 | u64 z_send_schedule; |
936 | /* percpu int_counter */ | 923 | |
937 | u64 __percpu *int_counter; | ||
938 | u64 __percpu *rcv_limit; | ||
939 | u64 __percpu *send_schedule; | 924 | u64 __percpu *send_schedule; |
940 | /* number of receive contexts in use by the driver */ | 925 | /* number of receive contexts in use by the driver */ |
941 | u32 num_rcv_contexts; | 926 | u32 num_rcv_contexts; |
@@ -950,6 +935,7 @@ struct hfi1_devdata { | |||
950 | /* base receive interrupt timeout, in CSR units */ | 935 | /* base receive interrupt timeout, in CSR units */ |
951 | u32 rcv_intr_timeout_csr; | 936 | u32 rcv_intr_timeout_csr; |
952 | 937 | ||
938 | u32 freezelen; /* max length of freezemsg */ | ||
953 | u64 __iomem *egrtidbase; | 939 | u64 __iomem *egrtidbase; |
954 | spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ | 940 | spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ |
955 | spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ | 941 | spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ |
@@ -971,7 +957,6 @@ struct hfi1_devdata { | |||
971 | * IB link status cheaply | 957 | * IB link status cheaply |
972 | */ | 958 | */ |
973 | struct hfi1_status *status; | 959 | struct hfi1_status *status; |
974 | u32 freezelen; /* max length of freezemsg */ | ||
975 | 960 | ||
976 | /* revision register shadow */ | 961 | /* revision register shadow */ |
977 | u64 revision; | 962 | u64 revision; |
@@ -999,6 +984,8 @@ struct hfi1_devdata { | |||
999 | u16 rcvegrbufsize_shift; | 984 | u16 rcvegrbufsize_shift; |
1000 | /* both sides of the PCIe link are gen3 capable */ | 985 | /* both sides of the PCIe link are gen3 capable */ |
1001 | u8 link_gen3_capable; | 986 | u8 link_gen3_capable; |
987 | /* default link down value (poll/sleep) */ | ||
988 | u8 link_default; | ||
1002 | /* localbus width (1, 2,4,8,16,32) from config space */ | 989 | /* localbus width (1, 2,4,8,16,32) from config space */ |
1003 | u32 lbus_width; | 990 | u32 lbus_width; |
1004 | /* localbus speed in MHz */ | 991 | /* localbus speed in MHz */ |
@@ -1034,8 +1021,6 @@ struct hfi1_devdata { | |||
1034 | u8 hfi1_id; | 1021 | u8 hfi1_id; |
1035 | /* implementation code */ | 1022 | /* implementation code */ |
1036 | u8 icode; | 1023 | u8 icode; |
1037 | /* default link down value (poll/sleep) */ | ||
1038 | u8 link_default; | ||
1039 | /* vAU of this device */ | 1024 | /* vAU of this device */ |
1040 | u8 vau; | 1025 | u8 vau; |
1041 | /* vCU of this device */ | 1026 | /* vCU of this device */ |
@@ -1046,27 +1031,17 @@ struct hfi1_devdata { | |||
1046 | u16 vl15_init; | 1031 | u16 vl15_init; |
1047 | 1032 | ||
1048 | /* Misc small ints */ | 1033 | /* Misc small ints */ |
1049 | /* Number of physical ports available */ | ||
1050 | u8 num_pports; | ||
1051 | /* Lowest context number which can be used by user processes */ | ||
1052 | u8 first_user_ctxt; | ||
1053 | u8 n_krcv_queues; | 1034 | u8 n_krcv_queues; |
1054 | u8 qos_shift; | 1035 | u8 qos_shift; |
1055 | u8 qpn_mask; | ||
1056 | 1036 | ||
1057 | u16 rhf_offset; /* offset of RHF within receive header entry */ | ||
1058 | u16 irev; /* implementation revision */ | 1037 | u16 irev; /* implementation revision */ |
1059 | u16 dc8051_ver; /* 8051 firmware version */ | 1038 | u16 dc8051_ver; /* 8051 firmware version */ |
1060 | 1039 | ||
1040 | spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ | ||
1061 | struct platform_config platform_config; | 1041 | struct platform_config platform_config; |
1062 | struct platform_config_cache pcfg_cache; | 1042 | struct platform_config_cache pcfg_cache; |
1063 | 1043 | ||
1064 | struct diag_client *diag_client; | 1044 | struct diag_client *diag_client; |
1065 | spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ | ||
1066 | |||
1067 | u8 psxmitwait_supported; | ||
1068 | /* cycle length of PS* counters in HW (in picoseconds) */ | ||
1069 | u16 psxmitwait_check_rate; | ||
1070 | 1045 | ||
1071 | /* MSI-X information */ | 1046 | /* MSI-X information */ |
1072 | struct hfi1_msix_entry *msix_entries; | 1047 | struct hfi1_msix_entry *msix_entries; |
@@ -1081,6 +1056,9 @@ struct hfi1_devdata { | |||
1081 | 1056 | ||
1082 | struct rcv_array_data rcv_entries; | 1057 | struct rcv_array_data rcv_entries; |
1083 | 1058 | ||
1059 | /* cycle length of PS* counters in HW (in picoseconds) */ | ||
1060 | u16 psxmitwait_check_rate; | ||
1061 | |||
1084 | /* | 1062 | /* |
1085 | * 64 bit synthetic counters | 1063 | * 64 bit synthetic counters |
1086 | */ | 1064 | */ |
@@ -1113,11 +1091,11 @@ struct hfi1_devdata { | |||
1113 | struct err_info_rcvport err_info_rcvport; | 1091 | struct err_info_rcvport err_info_rcvport; |
1114 | struct err_info_constraint err_info_rcv_constraint; | 1092 | struct err_info_constraint err_info_rcv_constraint; |
1115 | struct err_info_constraint err_info_xmit_constraint; | 1093 | struct err_info_constraint err_info_xmit_constraint; |
1116 | u8 err_info_uncorrectable; | ||
1117 | u8 err_info_fmconfig; | ||
1118 | 1094 | ||
1119 | atomic_t drop_packet; | 1095 | atomic_t drop_packet; |
1120 | u8 do_drop; | 1096 | u8 do_drop; |
1097 | u8 err_info_uncorrectable; | ||
1098 | u8 err_info_fmconfig; | ||
1121 | 1099 | ||
1122 | /* | 1100 | /* |
1123 | * Software counters for the status bits defined by the | 1101 | * Software counters for the status bits defined by the |
@@ -1140,47 +1118,71 @@ struct hfi1_devdata { | |||
1140 | u64 sw_cce_err_status_aggregate; | 1118 | u64 sw_cce_err_status_aggregate; |
1141 | /* Software counter that aggregates all bypass packet rcv errors */ | 1119 | /* Software counter that aggregates all bypass packet rcv errors */ |
1142 | u64 sw_rcv_bypass_packet_errors; | 1120 | u64 sw_rcv_bypass_packet_errors; |
1143 | /* receive interrupt functions */ | 1121 | /* receive interrupt function */ |
1144 | rhf_rcv_function_ptr *rhf_rcv_function_map; | ||
1145 | rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; | 1122 | rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; |
1146 | 1123 | ||
1124 | /* Save the enabled LCB error bits */ | ||
1125 | u64 lcb_err_en; | ||
1126 | |||
1147 | /* | 1127 | /* |
1148 | * Handlers for outgoing data so that snoop/capture does not | 1128 | * Handlers for outgoing data so that snoop/capture does not |
1149 | * have to have its hooks in the send path | 1129 | * have to have its hooks in the send path |
1150 | */ | 1130 | */ |
1151 | send_routine process_pio_send; | 1131 | send_routine process_pio_send ____cacheline_aligned_in_smp; |
1152 | send_routine process_dma_send; | 1132 | send_routine process_dma_send; |
1153 | void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, | 1133 | void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, |
1154 | u64 pbc, const void *from, size_t count); | 1134 | u64 pbc, const void *from, size_t count); |
1135 | /* hfi1_pportdata, points to array of (physical) port-specific | ||
1136 | * data structs, indexed by pidx (0..n-1) | ||
1137 | */ | ||
1138 | struct hfi1_pportdata *pport; | ||
1139 | /* receive context data */ | ||
1140 | struct hfi1_ctxtdata **rcd; | ||
1141 | u64 __percpu *int_counter; | ||
1142 | /* device (not port) flags, basically device capabilities */ | ||
1143 | u16 flags; | ||
1144 | /* Number of physical ports available */ | ||
1145 | u8 num_pports; | ||
1146 | /* Lowest context number which can be used by user processes */ | ||
1147 | u8 first_user_ctxt; | ||
1148 | /* adding a new field here would make it part of this cacheline */ | ||
1149 | |||
1150 | /* seqlock for sc2vl */ | ||
1151 | seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; | ||
1152 | u64 sc2vl[4]; | ||
1153 | /* receive interrupt functions */ | ||
1154 | rhf_rcv_function_ptr *rhf_rcv_function_map; | ||
1155 | u64 __percpu *rcv_limit; | ||
1156 | u16 rhf_offset; /* offset of RHF within receive header entry */ | ||
1157 | /* adding a new field here would make it part of this cacheline */ | ||
1155 | 1158 | ||
1156 | /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ | 1159 | /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ |
1157 | u8 oui1; | 1160 | u8 oui1; |
1158 | u8 oui2; | 1161 | u8 oui2; |
1159 | u8 oui3; | 1162 | u8 oui3; |
1163 | u8 dc_shutdown; | ||
1164 | |||
1160 | /* Timer and counter used to detect RcvBufOvflCnt changes */ | 1165 | /* Timer and counter used to detect RcvBufOvflCnt changes */ |
1161 | struct timer_list rcverr_timer; | 1166 | struct timer_list rcverr_timer; |
1162 | u32 rcv_ovfl_cnt; | ||
1163 | 1167 | ||
1164 | wait_queue_head_t event_queue; | 1168 | wait_queue_head_t event_queue; |
1165 | 1169 | ||
1166 | /* Save the enabled LCB error bits */ | ||
1167 | u64 lcb_err_en; | ||
1168 | u8 dc_shutdown; | ||
1169 | |||
1170 | /* receive context tail dummy address */ | 1170 | /* receive context tail dummy address */ |
1171 | __le64 *rcvhdrtail_dummy_kvaddr; | 1171 | __le64 *rcvhdrtail_dummy_kvaddr; |
1172 | dma_addr_t rcvhdrtail_dummy_dma; | 1172 | dma_addr_t rcvhdrtail_dummy_dma; |
1173 | 1173 | ||
1174 | bool eprom_available; /* true if EPROM is available for this device */ | 1174 | u32 rcv_ovfl_cnt; |
1175 | bool aspm_supported; /* Does HW support ASPM */ | ||
1176 | bool aspm_enabled; /* ASPM state: enabled/disabled */ | ||
1177 | /* Serialize ASPM enable/disable between multiple verbs contexts */ | 1175 | /* Serialize ASPM enable/disable between multiple verbs contexts */ |
1178 | spinlock_t aspm_lock; | 1176 | spinlock_t aspm_lock; |
1179 | /* Number of verbs contexts which have disabled ASPM */ | 1177 | /* Number of verbs contexts which have disabled ASPM */ |
1180 | atomic_t aspm_disabled_cnt; | 1178 | atomic_t aspm_disabled_cnt; |
1181 | 1179 | ||
1182 | struct hfi1_affinity *affinity; | 1180 | struct hfi1_affinity *affinity; |
1181 | bool eprom_available; /* true if EPROM is available for this device */ | ||
1182 | bool aspm_supported; /* Does HW support ASPM */ | ||
1183 | bool aspm_enabled; /* ASPM state: enabled/disabled */ | ||
1183 | struct rhashtable sdma_rht; | 1184 | struct rhashtable sdma_rht; |
1185 | |||
1184 | struct kobject kobj; | 1186 | struct kobject kobj; |
1185 | }; | 1187 | }; |
1186 | 1188 | ||