aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Sanchez <sebastian.sanchez@intel.com>2016-10-25 16:12:34 -0400
committerDoug Ledford <dledford@redhat.com>2016-11-15 16:37:27 -0500
commit8af8d2970ed98493a2db88dfcad88b0065e55e79 (patch)
tree57841ee0bdf580dc5c23fe6de9a52dfe20cf005d
parent2474d775d9e2f935ff6840c8b21b4262afacc821 (diff)
IB/hfi1: Optimize pio_buf and send_context structs
Both pio_buf and send_context structs have oversized fields and have cachelines that can be optimized. Reduce oversized fields for both structs. Make sure pio_buf struct fits within a cacheline. Move read-only fields to their own cacheline in send_context struct. All of this will avoid cacheline trading as the ring progresses and pio buffers/send contexts are used. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c5
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h29
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c22
3 files changed, 28 insertions, 28 deletions
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 516fac38d31e..86a7f365b624 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
765 sc->hw_context = hw_context; 765 sc->hw_context = hw_context;
766 cr_group_addresses(sc, &dma); 766 cr_group_addresses(sc, &dma);
767 sc->credits = sci->credits; 767 sc->credits = sci->credits;
768 sc->size = sc->credits * PIO_BLOCK_SIZE;
768 769
769/* PIO Send Memory Address details */ 770/* PIO Send Memory Address details */
770#define PIO_ADDR_CONTEXT_MASK 0xfful 771#define PIO_ADDR_CONTEXT_MASK 0xfful
@@ -1470,9 +1471,7 @@ retry:
1470 1471
1471 /* finish filling in the buffer outside the lock */ 1472 /* finish filling in the buffer outside the lock */
1472 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; 1473 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
1473 pbuf->size = sc->credits * PIO_BLOCK_SIZE; 1474 pbuf->end = sc->base_addr + sc->size;
1474 pbuf->end = sc->base_addr + pbuf->size;
1475 pbuf->block_count = blocks;
1476 pbuf->qw_written = 0; 1475 pbuf->qw_written = 0;
1477 pbuf->carry_bytes = 0; 1476 pbuf->carry_bytes = 0;
1478 pbuf->carry.val64 = 0; 1477 pbuf->carry.val64 = 0;
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 498b548055e0..867e5ffc3595 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -83,43 +83,43 @@ struct pio_buf {
83 void *arg; /* argument for cb */ 83 void *arg; /* argument for cb */
84 void __iomem *start; /* buffer start address */ 84 void __iomem *start; /* buffer start address */
85 void __iomem *end; /* context end address */ 85 void __iomem *end; /* context end address */
86 unsigned long size; /* context size, in bytes */
87 unsigned long sent_at; /* buffer is sent when <= free */ 86 unsigned long sent_at; /* buffer is sent when <= free */
88 u32 block_count; /* size of buffer, in blocks */
89 u32 qw_written; /* QW written so far */
90 u32 carry_bytes; /* number of valid bytes in carry */
91 union mix carry; /* pending unwritten bytes */ 87 union mix carry; /* pending unwritten bytes */
88 u16 qw_written; /* QW written so far */
89 u8 carry_bytes; /* number of valid bytes in carry */
92}; 90};
93 91
94/* cache line aligned pio buffer array */ 92/* cache line aligned pio buffer array */
95union pio_shadow_ring { 93union pio_shadow_ring {
96 struct pio_buf pbuf; 94 struct pio_buf pbuf;
97 u64 unused[16]; /* cache line spacer */
98} ____cacheline_aligned; 95} ____cacheline_aligned;
99 96
100/* per-NUMA send context */ 97/* per-NUMA send context */
101struct send_context { 98struct send_context {
102 /* read-only after init */ 99 /* read-only after init */
103 struct hfi1_devdata *dd; /* device */ 100 struct hfi1_devdata *dd; /* device */
104 void __iomem *base_addr; /* start of PIO memory */
105 union pio_shadow_ring *sr; /* shadow ring */ 101 union pio_shadow_ring *sr; /* shadow ring */
102 void __iomem *base_addr; /* start of PIO memory */
103 u32 __percpu *buffers_allocated;/* count of buffers allocated */
104 u32 size; /* context size, in bytes */
106 105
107 struct work_struct halt_work; /* halted context work queue entry */
108 unsigned long flags; /* flags */
109 int node; /* context home node */ 106 int node; /* context home node */
110 int type; /* context type */
111 u32 sw_index; /* software index number */
112 u32 hw_context; /* hardware context number */
113 u32 credits; /* number of blocks in context */
114 u32 sr_size; /* size of the shadow ring */ 107 u32 sr_size; /* size of the shadow ring */
115 u32 group; /* credit return group */ 108 u16 flags; /* flags */
109 u8 type; /* context type */
110 u8 sw_index; /* software index number */
111 u8 hw_context; /* hardware context number */
112 u8 group; /* credit return group */
113
116 /* allocator fields */ 114 /* allocator fields */
117 spinlock_t alloc_lock ____cacheline_aligned_in_smp; 115 spinlock_t alloc_lock ____cacheline_aligned_in_smp;
118 u32 sr_head; /* shadow ring head */ 116 u32 sr_head; /* shadow ring head */
119 unsigned long fill; /* official alloc count */ 117 unsigned long fill; /* official alloc count */
120 unsigned long alloc_free; /* copy of free (less cache thrash) */ 118 unsigned long alloc_free; /* copy of free (less cache thrash) */
121 u32 __percpu *buffers_allocated;/* count of buffers allocated */
122 u32 fill_wrap; /* tracks fill within ring */ 119 u32 fill_wrap; /* tracks fill within ring */
120 u32 credits; /* number of blocks in context */
121 /* adding a new field here would make it part of this cacheline */
122
123 /* releaser fields */ 123 /* releaser fields */
124 spinlock_t release_lock ____cacheline_aligned_in_smp; 124 spinlock_t release_lock ____cacheline_aligned_in_smp;
125 u32 sr_tail; /* shadow ring tail */ 125 u32 sr_tail; /* shadow ring tail */
@@ -131,6 +131,7 @@ struct send_context {
131 u32 credit_intr_count; /* count of credit intr users */ 131 u32 credit_intr_count; /* count of credit intr users */
132 u64 credit_ctrl; /* cache for credit control */ 132 u64 credit_ctrl; /* cache for credit control */
133 wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ 133 wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
134 struct work_struct halt_work; /* halted context work queue entry */
134}; 135};
135 136
136/* send context flags */ 137/* send context flags */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index aa7773643107..03024cec78dd 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
129 dest += sizeof(u64); 129 dest += sizeof(u64);
130 } 130 }
131 131
132 dest -= pbuf->size; 132 dest -= pbuf->sc->size;
133 dend -= pbuf->size; 133 dend -= pbuf->sc->size;
134 } 134 }
135 135
136 /* write 8-byte non-SOP, non-wrap chunk data */ 136 /* write 8-byte non-SOP, non-wrap chunk data */
@@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
361 dest += sizeof(u64); 361 dest += sizeof(u64);
362 } 362 }
363 363
364 dest -= pbuf->size; 364 dest -= pbuf->sc->size;
365 dend -= pbuf->size; 365 dend -= pbuf->sc->size;
366 } 366 }
367 367
368 /* write 8-byte non-SOP, non-wrap chunk data */ 368 /* write 8-byte non-SOP, non-wrap chunk data */
@@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
458 dest += sizeof(u64); 458 dest += sizeof(u64);
459 } 459 }
460 460
461 dest -= pbuf->size; 461 dest -= pbuf->sc->size;
462 dend -= pbuf->size; 462 dend -= pbuf->sc->size;
463 } 463 }
464 464
465 /* write 8-byte non-SOP, non-wrap chunk data */ 465 /* write 8-byte non-SOP, non-wrap chunk data */
@@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
492 */ 492 */
493 /* adjust if we have wrapped */ 493 /* adjust if we have wrapped */
494 if (dest >= pbuf->end) 494 if (dest >= pbuf->end)
495 dest -= pbuf->size; 495 dest -= pbuf->sc->size;
496 /* jump to the SOP range if within the first block */ 496 /* jump to the SOP range if within the first block */
497 else if (pbuf->qw_written < PIO_BLOCK_QWS) 497 else if (pbuf->qw_written < PIO_BLOCK_QWS)
498 dest += SOP_DISTANCE; 498 dest += SOP_DISTANCE;
@@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf,
584 dest += sizeof(u64); 584 dest += sizeof(u64);
585 } 585 }
586 586
587 dest -= pbuf->size; 587 dest -= pbuf->sc->size;
588 dend -= pbuf->size; 588 dend -= pbuf->sc->size;
589 } 589 }
590 590
591 /* write 8-byte non-SOP, non-wrap chunk data */ 591 /* write 8-byte non-SOP, non-wrap chunk data */
@@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
666 */ 666 */
667 /* adjust if we've wrapped */ 667 /* adjust if we've wrapped */
668 if (dest >= pbuf->end) 668 if (dest >= pbuf->end)
669 dest -= pbuf->size; 669 dest -= pbuf->sc->size;
670 /* jump to SOP range if within the first block */ 670 /* jump to SOP range if within the first block */
671 else if (pbuf->qw_written < PIO_BLOCK_QWS) 671 else if (pbuf->qw_written < PIO_BLOCK_QWS)
672 dest += SOP_DISTANCE; 672 dest += SOP_DISTANCE;
@@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf)
719 */ 719 */
720 /* adjust if we have wrapped */ 720 /* adjust if we have wrapped */
721 if (dest >= pbuf->end) 721 if (dest >= pbuf->end)
722 dest -= pbuf->size; 722 dest -= pbuf->sc->size;
723 /* jump to the SOP range if within the first block */ 723 /* jump to the SOP range if within the first block */
724 else if (pbuf->qw_written < PIO_BLOCK_QWS) 724 else if (pbuf->qw_written < PIO_BLOCK_QWS)
725 dest += SOP_DISTANCE; 725 dest += SOP_DISTANCE;