aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Sanchez <sebastian.sanchez@intel.com>2016-10-17 07:19:35 -0400
committerDoug Ledford <dledford@redhat.com>2016-11-15 16:37:27 -0500
commit6e768f0682e26e7683c4af5b0de04a2e08bc67db (patch)
tree21b4179a223198acf1f052854bfade590a8e35bf
parenta6cd5f08e085176fe8dbd4c57217058e53dfc4d9 (diff)
IB/hfi1: Optimize devdata cachelines
Profiling shows hot path struct members that need to be in a minimum set of cachelines. Group these struct member in the same cacheline: sc2vl_lock sc2vl rhf_rcv_function_map rcv_limit rhf_offset Group these struct member in the same cacheline: process_pio_send process_dma_send pport rcd int_counter flags num_pports first_user_ctxt Fill holes in struct hfi1_devdata revealed by pahole. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h108
1 files changed, 55 insertions, 53 deletions
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a2ea643a79fc..25e44c4af995 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -852,32 +852,29 @@ struct hfi1_devdata {
852 u8 __iomem *kregend; 852 u8 __iomem *kregend;
853 /* physical address of chip for io_remap, etc. */ 853 /* physical address of chip for io_remap, etc. */
854 resource_size_t physaddr; 854 resource_size_t physaddr;
855 /* receive context data */ 855 /* Per VL data. Enough for all VLs but not all elements are set/used. */
856 struct hfi1_ctxtdata **rcd; 856 struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
857 /* send context data */ 857 /* send context data */
858 struct send_context_info *send_contexts; 858 struct send_context_info *send_contexts;
859 /* map hardware send contexts to software index */ 859 /* map hardware send contexts to software index */
860 u8 *hw_to_sw; 860 u8 *hw_to_sw;
861 /* spinlock for allocating and releasing send context resources */ 861 /* spinlock for allocating and releasing send context resources */
862 spinlock_t sc_lock; 862 spinlock_t sc_lock;
863 /* Per VL data. Enough for all VLs but not all elements are set/used. */
864 struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
865 /* lock for pio_map */ 863 /* lock for pio_map */
866 spinlock_t pio_map_lock; 864 spinlock_t pio_map_lock;
865 /* Send Context initialization lock. */
866 spinlock_t sc_init_lock;
867 /* lock for sdma_map */
868 spinlock_t sde_map_lock;
867 /* array of kernel send contexts */ 869 /* array of kernel send contexts */
868 struct send_context **kernel_send_context; 870 struct send_context **kernel_send_context;
869 /* array of vl maps */ 871 /* array of vl maps */
870 struct pio_vl_map __rcu *pio_map; 872 struct pio_vl_map __rcu *pio_map;
871 /* seqlock for sc2vl */ 873 /* default flags to last descriptor */
872 seqlock_t sc2vl_lock; 874 u64 default_desc1;
873 u64 sc2vl[4];
874 /* Send Context initialization lock. */
875 spinlock_t sc_init_lock;
876 875
877 /* fields common to all SDMA engines */ 876 /* fields common to all SDMA engines */
878 877
879 /* default flags to last descriptor */
880 u64 default_desc1;
881 volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ 878 volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
882 dma_addr_t sdma_heads_phys; 879 dma_addr_t sdma_heads_phys;
883 void *sdma_pad_dma; /* DMA'ed by chip */ 880 void *sdma_pad_dma; /* DMA'ed by chip */
@@ -888,8 +885,6 @@ struct hfi1_devdata {
888 u32 chip_sdma_engines; 885 u32 chip_sdma_engines;
889 /* num used */ 886 /* num used */
890 u32 num_sdma; 887 u32 num_sdma;
891 /* lock for sdma_map */
892 spinlock_t sde_map_lock;
893 /* array of engines sized by num_sdma */ 888 /* array of engines sized by num_sdma */
894 struct sdma_engine *per_sdma; 889 struct sdma_engine *per_sdma;
895 /* array of vl maps */ 890 /* array of vl maps */
@@ -898,14 +893,11 @@ struct hfi1_devdata {
898 wait_queue_head_t sdma_unfreeze_wq; 893 wait_queue_head_t sdma_unfreeze_wq;
899 atomic_t sdma_unfreeze_count; 894 atomic_t sdma_unfreeze_count;
900 895
896 u32 lcb_access_count; /* count of LCB users */
897
901 /* common data between shared ASIC HFIs in this OS */ 898 /* common data between shared ASIC HFIs in this OS */
902 struct hfi1_asic_data *asic_data; 899 struct hfi1_asic_data *asic_data;
903 900
904 /* hfi1_pportdata, points to array of (physical) port-specific
905 * data structs, indexed by pidx (0..n-1)
906 */
907 struct hfi1_pportdata *pport;
908
909 /* mem-mapped pointer to base of PIO buffers */ 901 /* mem-mapped pointer to base of PIO buffers */
910 void __iomem *piobase; 902 void __iomem *piobase;
911 /* 903 /*
@@ -922,20 +914,13 @@ struct hfi1_devdata {
922 /* send context numbers and sizes for each type */ 914 /* send context numbers and sizes for each type */
923 struct sc_config_sizes sc_sizes[SC_MAX]; 915 struct sc_config_sizes sc_sizes[SC_MAX];
924 916
925 u32 lcb_access_count; /* count of LCB users */
926
927 char *boardname; /* human readable board info */ 917 char *boardname; /* human readable board info */
928 918
929 /* device (not port) flags, basically device capabilities */
930 u32 flags;
931
932 /* reset value */ 919 /* reset value */
933 u64 z_int_counter; 920 u64 z_int_counter;
934 u64 z_rcv_limit; 921 u64 z_rcv_limit;
935 u64 z_send_schedule; 922 u64 z_send_schedule;
936 /* percpu int_counter */ 923
937 u64 __percpu *int_counter;
938 u64 __percpu *rcv_limit;
939 u64 __percpu *send_schedule; 924 u64 __percpu *send_schedule;
940 /* number of receive contexts in use by the driver */ 925 /* number of receive contexts in use by the driver */
941 u32 num_rcv_contexts; 926 u32 num_rcv_contexts;
@@ -950,6 +935,7 @@ struct hfi1_devdata {
950 /* base receive interrupt timeout, in CSR units */ 935 /* base receive interrupt timeout, in CSR units */
951 u32 rcv_intr_timeout_csr; 936 u32 rcv_intr_timeout_csr;
952 937
938 u32 freezelen; /* max length of freezemsg */
953 u64 __iomem *egrtidbase; 939 u64 __iomem *egrtidbase;
954 spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ 940 spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
955 spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ 941 spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
@@ -971,7 +957,6 @@ struct hfi1_devdata {
971 * IB link status cheaply 957 * IB link status cheaply
972 */ 958 */
973 struct hfi1_status *status; 959 struct hfi1_status *status;
974 u32 freezelen; /* max length of freezemsg */
975 960
976 /* revision register shadow */ 961 /* revision register shadow */
977 u64 revision; 962 u64 revision;
@@ -999,6 +984,8 @@ struct hfi1_devdata {
999 u16 rcvegrbufsize_shift; 984 u16 rcvegrbufsize_shift;
1000 /* both sides of the PCIe link are gen3 capable */ 985 /* both sides of the PCIe link are gen3 capable */
1001 u8 link_gen3_capable; 986 u8 link_gen3_capable;
987 /* default link down value (poll/sleep) */
988 u8 link_default;
1002 /* localbus width (1, 2,4,8,16,32) from config space */ 989 /* localbus width (1, 2,4,8,16,32) from config space */
1003 u32 lbus_width; 990 u32 lbus_width;
1004 /* localbus speed in MHz */ 991 /* localbus speed in MHz */
@@ -1034,8 +1021,6 @@ struct hfi1_devdata {
1034 u8 hfi1_id; 1021 u8 hfi1_id;
1035 /* implementation code */ 1022 /* implementation code */
1036 u8 icode; 1023 u8 icode;
1037 /* default link down value (poll/sleep) */
1038 u8 link_default;
1039 /* vAU of this device */ 1024 /* vAU of this device */
1040 u8 vau; 1025 u8 vau;
1041 /* vCU of this device */ 1026 /* vCU of this device */
@@ -1046,27 +1031,17 @@ struct hfi1_devdata {
1046 u16 vl15_init; 1031 u16 vl15_init;
1047 1032
1048 /* Misc small ints */ 1033 /* Misc small ints */
1049 /* Number of physical ports available */
1050 u8 num_pports;
1051 /* Lowest context number which can be used by user processes */
1052 u8 first_user_ctxt;
1053 u8 n_krcv_queues; 1034 u8 n_krcv_queues;
1054 u8 qos_shift; 1035 u8 qos_shift;
1055 u8 qpn_mask;
1056 1036
1057 u16 rhf_offset; /* offset of RHF within receive header entry */
1058 u16 irev; /* implementation revision */ 1037 u16 irev; /* implementation revision */
1059 u16 dc8051_ver; /* 8051 firmware version */ 1038 u16 dc8051_ver; /* 8051 firmware version */
1060 1039
1040 spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
1061 struct platform_config platform_config; 1041 struct platform_config platform_config;
1062 struct platform_config_cache pcfg_cache; 1042 struct platform_config_cache pcfg_cache;
1063 1043
1064 struct diag_client *diag_client; 1044 struct diag_client *diag_client;
1065 spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
1066
1067 u8 psxmitwait_supported;
1068 /* cycle length of PS* counters in HW (in picoseconds) */
1069 u16 psxmitwait_check_rate;
1070 1045
1071 /* MSI-X information */ 1046 /* MSI-X information */
1072 struct hfi1_msix_entry *msix_entries; 1047 struct hfi1_msix_entry *msix_entries;
@@ -1081,6 +1056,9 @@ struct hfi1_devdata {
1081 1056
1082 struct rcv_array_data rcv_entries; 1057 struct rcv_array_data rcv_entries;
1083 1058
1059 /* cycle length of PS* counters in HW (in picoseconds) */
1060 u16 psxmitwait_check_rate;
1061
1084 /* 1062 /*
1085 * 64 bit synthetic counters 1063 * 64 bit synthetic counters
1086 */ 1064 */
@@ -1113,11 +1091,11 @@ struct hfi1_devdata {
1113 struct err_info_rcvport err_info_rcvport; 1091 struct err_info_rcvport err_info_rcvport;
1114 struct err_info_constraint err_info_rcv_constraint; 1092 struct err_info_constraint err_info_rcv_constraint;
1115 struct err_info_constraint err_info_xmit_constraint; 1093 struct err_info_constraint err_info_xmit_constraint;
1116 u8 err_info_uncorrectable;
1117 u8 err_info_fmconfig;
1118 1094
1119 atomic_t drop_packet; 1095 atomic_t drop_packet;
1120 u8 do_drop; 1096 u8 do_drop;
1097 u8 err_info_uncorrectable;
1098 u8 err_info_fmconfig;
1121 1099
1122 /* 1100 /*
1123 * Software counters for the status bits defined by the 1101 * Software counters for the status bits defined by the
@@ -1140,47 +1118,71 @@ struct hfi1_devdata {
1140 u64 sw_cce_err_status_aggregate; 1118 u64 sw_cce_err_status_aggregate;
1141 /* Software counter that aggregates all bypass packet rcv errors */ 1119 /* Software counter that aggregates all bypass packet rcv errors */
1142 u64 sw_rcv_bypass_packet_errors; 1120 u64 sw_rcv_bypass_packet_errors;
1143 /* receive interrupt functions */ 1121 /* receive interrupt function */
1144 rhf_rcv_function_ptr *rhf_rcv_function_map;
1145 rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; 1122 rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
1146 1123
1124 /* Save the enabled LCB error bits */
1125 u64 lcb_err_en;
1126
1147 /* 1127 /*
1148 * Handlers for outgoing data so that snoop/capture does not 1128 * Handlers for outgoing data so that snoop/capture does not
1149 * have to have its hooks in the send path 1129 * have to have its hooks in the send path
1150 */ 1130 */
1151 send_routine process_pio_send; 1131 send_routine process_pio_send ____cacheline_aligned_in_smp;
1152 send_routine process_dma_send; 1132 send_routine process_dma_send;
1153 void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, 1133 void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
1154 u64 pbc, const void *from, size_t count); 1134 u64 pbc, const void *from, size_t count);
1135 /* hfi1_pportdata, points to array of (physical) port-specific
1136 * data structs, indexed by pidx (0..n-1)
1137 */
1138 struct hfi1_pportdata *pport;
1139 /* receive context data */
1140 struct hfi1_ctxtdata **rcd;
1141 u64 __percpu *int_counter;
1142 /* device (not port) flags, basically device capabilities */
1143 u16 flags;
1144 /* Number of physical ports available */
1145 u8 num_pports;
1146 /* Lowest context number which can be used by user processes */
1147 u8 first_user_ctxt;
1148 /* adding a new field here would make it part of this cacheline */
1149
1150 /* seqlock for sc2vl */
1151 seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
1152 u64 sc2vl[4];
1153 /* receive interrupt functions */
1154 rhf_rcv_function_ptr *rhf_rcv_function_map;
1155 u64 __percpu *rcv_limit;
1156 u16 rhf_offset; /* offset of RHF within receive header entry */
1157 /* adding a new field here would make it part of this cacheline */
1155 1158
1156 /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ 1159 /* OUI comes from the HW. Used everywhere as 3 separate bytes. */
1157 u8 oui1; 1160 u8 oui1;
1158 u8 oui2; 1161 u8 oui2;
1159 u8 oui3; 1162 u8 oui3;
1163 u8 dc_shutdown;
1164
1160 /* Timer and counter used to detect RcvBufOvflCnt changes */ 1165 /* Timer and counter used to detect RcvBufOvflCnt changes */
1161 struct timer_list rcverr_timer; 1166 struct timer_list rcverr_timer;
1162 u32 rcv_ovfl_cnt;
1163 1167
1164 wait_queue_head_t event_queue; 1168 wait_queue_head_t event_queue;
1165 1169
1166 /* Save the enabled LCB error bits */
1167 u64 lcb_err_en;
1168 u8 dc_shutdown;
1169
1170 /* receive context tail dummy address */ 1170 /* receive context tail dummy address */
1171 __le64 *rcvhdrtail_dummy_kvaddr; 1171 __le64 *rcvhdrtail_dummy_kvaddr;
1172 dma_addr_t rcvhdrtail_dummy_dma; 1172 dma_addr_t rcvhdrtail_dummy_dma;
1173 1173
1174 bool eprom_available; /* true if EPROM is available for this device */ 1174 u32 rcv_ovfl_cnt;
1175 bool aspm_supported; /* Does HW support ASPM */
1176 bool aspm_enabled; /* ASPM state: enabled/disabled */
1177 /* Serialize ASPM enable/disable between multiple verbs contexts */ 1175 /* Serialize ASPM enable/disable between multiple verbs contexts */
1178 spinlock_t aspm_lock; 1176 spinlock_t aspm_lock;
1179 /* Number of verbs contexts which have disabled ASPM */ 1177 /* Number of verbs contexts which have disabled ASPM */
1180 atomic_t aspm_disabled_cnt; 1178 atomic_t aspm_disabled_cnt;
1181 1179
1182 struct hfi1_affinity *affinity; 1180 struct hfi1_affinity *affinity;
1181 bool eprom_available; /* true if EPROM is available for this device */
1182 bool aspm_supported; /* Does HW support ASPM */
1183 bool aspm_enabled; /* ASPM state: enabled/disabled */
1183 struct rhashtable sdma_rht; 1184 struct rhashtable sdma_rht;
1185
1184 struct kobject kobj; 1186 struct kobject kobj;
1185}; 1187};
1186 1188