aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorBrett Creeley <brett.creeley@intel.com>2019-02-19 18:04:05 -0500
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2019-03-26 18:03:25 -0400
commit8244dd2d23b251dcba3238e42216e9277beb5729 (patch)
tree2daf864216a31bd9cea13bbd02ecc936652b75eb /drivers
parent89f3e4a5b762db66de94c44cfea11195f9d549b3 (diff)
ice: Audit hotpath structures with pahole
Currently the ice_q_vector structure and ice_ring_container structure are taking up more space than necessary due to cache alignment holes and unnecessary variables respectively. This is not helping the driver's performance. The following fixes were done to improve cache alignment, reduce wasted space, and increase performance. 1. Remove the ice_latency_range enum as it is unused. 2. Remove the latency_range variable in the ice_ring_container structure. 3. Change the size of the itr_idx in the ice_ring_container structure from an int to an u16. This reduced the size of ice_ring_container structure to 32 Bytes so it has no holes or padding. 4. Re-arrange the ice_q_vector structure using pahole to align members as best as possible in regards to 64 Byte cache line size. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h10
4 files changed, 15 insertions, 20 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 7609cccb251e..b819689da7e2 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -294,20 +294,25 @@ struct ice_vsi {
294/* struct that defines an interrupt vector */ 294/* struct that defines an interrupt vector */
295struct ice_q_vector { 295struct ice_q_vector {
296 struct ice_vsi *vsi; 296 struct ice_vsi *vsi;
297 cpumask_t affinity_mask; 297
298 struct napi_struct napi;
299 struct ice_ring_container rx;
300 struct ice_ring_container tx;
301 struct irq_affinity_notify affinity_notify;
302 u16 v_idx; /* index in the vsi->q_vector array. */ 298 u16 v_idx; /* index in the vsi->q_vector array. */
303 u8 num_ring_tx; /* total number of Tx rings in vector */
304 u8 num_ring_rx; /* total number of Rx rings in vector */ 299 u8 num_ring_rx; /* total number of Rx rings in vector */
305 char name[ICE_INT_NAME_STR_LEN]; 300 u8 num_ring_tx; /* total number of Tx rings in vector */
301 u8 itr_countdown; /* when 0 should adjust adaptive ITR */
306 /* in usecs, need to use ice_intrl_to_usecs_reg() before writing this 302 /* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
307 * value to the device 303 * value to the device
308 */ 304 */
309 u8 intrl; 305 u8 intrl;
310 u8 itr_countdown; /* when 0 should adjust adaptive ITR */ 306
307 struct napi_struct napi;
308
309 struct ice_ring_container rx;
310 struct ice_ring_container tx;
311
312 cpumask_t affinity_mask;
313 struct irq_affinity_notify affinity_notify;
314
315 char name[ICE_INT_NAME_STR_LEN];
311} ____cacheline_internodealigned_in_smp; 316} ____cacheline_internodealigned_in_smp;
312 317
313enum ice_pf_flags { 318enum ice_pf_flags {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index a64db22e6ba4..bf0160b6d6ac 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1820,7 +1820,6 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector)
1820 rc->target_itr = ITR_TO_REG(rc->itr_setting); 1820 rc->target_itr = ITR_TO_REG(rc->itr_setting);
1821 rc->next_update = jiffies + 1; 1821 rc->next_update = jiffies + 1;
1822 rc->current_itr = rc->target_itr; 1822 rc->current_itr = rc->target_itr;
1823 rc->latency_range = ICE_LOW_LATENCY;
1824 wr32(hw, GLINT_ITR(rc->itr_idx, vector), 1823 wr32(hw, GLINT_ITR(rc->itr_idx, vector),
1825 ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); 1824 ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
1826 } 1825 }
@@ -1835,7 +1834,6 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector)
1835 rc->target_itr = ITR_TO_REG(rc->itr_setting); 1834 rc->target_itr = ITR_TO_REG(rc->itr_setting);
1836 rc->next_update = jiffies + 1; 1835 rc->next_update = jiffies + 1;
1837 rc->current_itr = rc->target_itr; 1836 rc->current_itr = rc->target_itr;
1838 rc->latency_range = ICE_LOW_LATENCY;
1839 wr32(hw, GLINT_ITR(rc->itr_idx, vector), 1837 wr32(hw, GLINT_ITR(rc->itr_idx, vector),
1840 ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); 1838 ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
1841 } 1839 }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index dfd7fa06ed22..9a80e9ec3f10 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1323,7 +1323,7 @@ clear_counts:
1323 * @itr_idx: interrupt throttling index 1323 * @itr_idx: interrupt throttling index
1324 * @itr: interrupt throttling value in usecs 1324 * @itr: interrupt throttling value in usecs
1325 */ 1325 */
1326static u32 ice_buildreg_itr(int itr_idx, u16 itr) 1326static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
1327{ 1327{
1328 /* The itr value is reported in microseconds, and the register value is 1328 /* The itr value is reported in microseconds, and the register value is
1329 * recorded in 2 microsecond units. For this reason we only need to 1329 * recorded in 2 microsecond units. For this reason we only need to
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 69625857c482..2c8af98ff640 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -184,21 +184,13 @@ struct ice_ring {
184 u16 next_to_alloc; 184 u16 next_to_alloc;
185} ____cacheline_internodealigned_in_smp; 185} ____cacheline_internodealigned_in_smp;
186 186
187enum ice_latency_range {
188 ICE_LOWEST_LATENCY = 0,
189 ICE_LOW_LATENCY = 1,
190 ICE_BULK_LATENCY = 2,
191 ICE_ULTRA_LATENCY = 3,
192};
193
194struct ice_ring_container { 187struct ice_ring_container {
195 /* head of linked-list of rings */ 188 /* head of linked-list of rings */
196 struct ice_ring *ring; 189 struct ice_ring *ring;
197 unsigned long next_update; /* jiffies value of next queue update */ 190 unsigned long next_update; /* jiffies value of next queue update */
198 unsigned int total_bytes; /* total bytes processed this int */ 191 unsigned int total_bytes; /* total bytes processed this int */
199 unsigned int total_pkts; /* total packets processed this int */ 192 unsigned int total_pkts; /* total packets processed this int */
200 enum ice_latency_range latency_range; 193 u16 itr_idx; /* index in the interrupt vector */
201 int itr_idx; /* index in the interrupt vector */
202 u16 target_itr; /* value in usecs divided by the hw->itr_gran */ 194 u16 target_itr; /* value in usecs divided by the hw->itr_gran */
203 u16 current_itr; /* value in usecs divided by the hw->itr_gran */ 195 u16 current_itr; /* value in usecs divided by the hw->itr_gran */
204 /* high bit set means dynamic ITR, rest is used to store user 196 /* high bit set means dynamic ITR, rest is used to store user