diff options
author | Sebastian Sanchez <sebastian.sanchez@intel.com> | 2016-10-25 16:12:34 -0400 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-11-15 16:37:27 -0500 |
commit | 8af8d2970ed98493a2db88dfcad88b0065e55e79 (patch) | |
tree | 57841ee0bdf580dc5c23fe6de9a52dfe20cf005d | |
parent | 2474d775d9e2f935ff6840c8b21b4262afacc821 (diff) |
IB/hfi1: Optimize pio_buf and send_context structs
Both pio_buf and send_context structs have oversized
fields and have cachelines that can be optimized.
Reduce oversized fields for both structs.
Make sure pio_buf struct fits within a cacheline.
Move read-only fields to their own cacheline in
send_context struct.
All of this will avoid cacheline trading as the ring
progresses and pio buffers/send contexts are used.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.h | 29 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio_copy.c | 22 |
3 files changed, 28 insertions, 28 deletions
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 516fac38d31e..86a7f365b624 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c | |||
@@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, | |||
765 | sc->hw_context = hw_context; | 765 | sc->hw_context = hw_context; |
766 | cr_group_addresses(sc, &dma); | 766 | cr_group_addresses(sc, &dma); |
767 | sc->credits = sci->credits; | 767 | sc->credits = sci->credits; |
768 | sc->size = sc->credits * PIO_BLOCK_SIZE; | ||
768 | 769 | ||
769 | /* PIO Send Memory Address details */ | 770 | /* PIO Send Memory Address details */ |
770 | #define PIO_ADDR_CONTEXT_MASK 0xfful | 771 | #define PIO_ADDR_CONTEXT_MASK 0xfful |
@@ -1470,9 +1471,7 @@ retry: | |||
1470 | 1471 | ||
1471 | /* finish filling in the buffer outside the lock */ | 1472 | /* finish filling in the buffer outside the lock */ |
1472 | pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; | 1473 | pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; |
1473 | pbuf->size = sc->credits * PIO_BLOCK_SIZE; | 1474 | pbuf->end = sc->base_addr + sc->size; |
1474 | pbuf->end = sc->base_addr + pbuf->size; | ||
1475 | pbuf->block_count = blocks; | ||
1476 | pbuf->qw_written = 0; | 1475 | pbuf->qw_written = 0; |
1477 | pbuf->carry_bytes = 0; | 1476 | pbuf->carry_bytes = 0; |
1478 | pbuf->carry.val64 = 0; | 1477 | pbuf->carry.val64 = 0; |
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 498b548055e0..867e5ffc3595 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h | |||
@@ -83,43 +83,43 @@ struct pio_buf { | |||
83 | void *arg; /* argument for cb */ | 83 | void *arg; /* argument for cb */ |
84 | void __iomem *start; /* buffer start address */ | 84 | void __iomem *start; /* buffer start address */ |
85 | void __iomem *end; /* context end address */ | 85 | void __iomem *end; /* context end address */ |
86 | unsigned long size; /* context size, in bytes */ | ||
87 | unsigned long sent_at; /* buffer is sent when <= free */ | 86 | unsigned long sent_at; /* buffer is sent when <= free */ |
88 | u32 block_count; /* size of buffer, in blocks */ | ||
89 | u32 qw_written; /* QW written so far */ | ||
90 | u32 carry_bytes; /* number of valid bytes in carry */ | ||
91 | union mix carry; /* pending unwritten bytes */ | 87 | union mix carry; /* pending unwritten bytes */ |
88 | u16 qw_written; /* QW written so far */ | ||
89 | u8 carry_bytes; /* number of valid bytes in carry */ | ||
92 | }; | 90 | }; |
93 | 91 | ||
94 | /* cache line aligned pio buffer array */ | 92 | /* cache line aligned pio buffer array */ |
95 | union pio_shadow_ring { | 93 | union pio_shadow_ring { |
96 | struct pio_buf pbuf; | 94 | struct pio_buf pbuf; |
97 | u64 unused[16]; /* cache line spacer */ | ||
98 | } ____cacheline_aligned; | 95 | } ____cacheline_aligned; |
99 | 96 | ||
100 | /* per-NUMA send context */ | 97 | /* per-NUMA send context */ |
101 | struct send_context { | 98 | struct send_context { |
102 | /* read-only after init */ | 99 | /* read-only after init */ |
103 | struct hfi1_devdata *dd; /* device */ | 100 | struct hfi1_devdata *dd; /* device */ |
104 | void __iomem *base_addr; /* start of PIO memory */ | ||
105 | union pio_shadow_ring *sr; /* shadow ring */ | 101 | union pio_shadow_ring *sr; /* shadow ring */ |
102 | void __iomem *base_addr; /* start of PIO memory */ | ||
103 | u32 __percpu *buffers_allocated;/* count of buffers allocated */ | ||
104 | u32 size; /* context size, in bytes */ | ||
106 | 105 | ||
107 | struct work_struct halt_work; /* halted context work queue entry */ | ||
108 | unsigned long flags; /* flags */ | ||
109 | int node; /* context home node */ | 106 | int node; /* context home node */ |
110 | int type; /* context type */ | ||
111 | u32 sw_index; /* software index number */ | ||
112 | u32 hw_context; /* hardware context number */ | ||
113 | u32 credits; /* number of blocks in context */ | ||
114 | u32 sr_size; /* size of the shadow ring */ | 107 | u32 sr_size; /* size of the shadow ring */ |
115 | u32 group; /* credit return group */ | 108 | u16 flags; /* flags */ |
109 | u8 type; /* context type */ | ||
110 | u8 sw_index; /* software index number */ | ||
111 | u8 hw_context; /* hardware context number */ | ||
112 | u8 group; /* credit return group */ | ||
113 | |||
116 | /* allocator fields */ | 114 | /* allocator fields */ |
117 | spinlock_t alloc_lock ____cacheline_aligned_in_smp; | 115 | spinlock_t alloc_lock ____cacheline_aligned_in_smp; |
118 | u32 sr_head; /* shadow ring head */ | 116 | u32 sr_head; /* shadow ring head */ |
119 | unsigned long fill; /* official alloc count */ | 117 | unsigned long fill; /* official alloc count */ |
120 | unsigned long alloc_free; /* copy of free (less cache thrash) */ | 118 | unsigned long alloc_free; /* copy of free (less cache thrash) */ |
121 | u32 __percpu *buffers_allocated;/* count of buffers allocated */ | ||
122 | u32 fill_wrap; /* tracks fill within ring */ | 119 | u32 fill_wrap; /* tracks fill within ring */ |
120 | u32 credits; /* number of blocks in context */ | ||
121 | /* adding a new field here would make it part of this cacheline */ | ||
122 | |||
123 | /* releaser fields */ | 123 | /* releaser fields */ |
124 | spinlock_t release_lock ____cacheline_aligned_in_smp; | 124 | spinlock_t release_lock ____cacheline_aligned_in_smp; |
125 | u32 sr_tail; /* shadow ring tail */ | 125 | u32 sr_tail; /* shadow ring tail */ |
@@ -131,6 +131,7 @@ struct send_context { | |||
131 | u32 credit_intr_count; /* count of credit intr users */ | 131 | u32 credit_intr_count; /* count of credit intr users */ |
132 | u64 credit_ctrl; /* cache for credit control */ | 132 | u64 credit_ctrl; /* cache for credit control */ |
133 | wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ | 133 | wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ |
134 | struct work_struct halt_work; /* halted context work queue entry */ | ||
134 | }; | 135 | }; |
135 | 136 | ||
136 | /* send context flags */ | 137 | /* send context flags */ |
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index aa7773643107..03024cec78dd 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c | |||
@@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, | |||
129 | dest += sizeof(u64); | 129 | dest += sizeof(u64); |
130 | } | 130 | } |
131 | 131 | ||
132 | dest -= pbuf->size; | 132 | dest -= pbuf->sc->size; |
133 | dend -= pbuf->size; | 133 | dend -= pbuf->sc->size; |
134 | } | 134 | } |
135 | 135 | ||
136 | /* write 8-byte non-SOP, non-wrap chunk data */ | 136 | /* write 8-byte non-SOP, non-wrap chunk data */ |
@@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, | |||
361 | dest += sizeof(u64); | 361 | dest += sizeof(u64); |
362 | } | 362 | } |
363 | 363 | ||
364 | dest -= pbuf->size; | 364 | dest -= pbuf->sc->size; |
365 | dend -= pbuf->size; | 365 | dend -= pbuf->sc->size; |
366 | } | 366 | } |
367 | 367 | ||
368 | /* write 8-byte non-SOP, non-wrap chunk data */ | 368 | /* write 8-byte non-SOP, non-wrap chunk data */ |
@@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) | |||
458 | dest += sizeof(u64); | 458 | dest += sizeof(u64); |
459 | } | 459 | } |
460 | 460 | ||
461 | dest -= pbuf->size; | 461 | dest -= pbuf->sc->size; |
462 | dend -= pbuf->size; | 462 | dend -= pbuf->sc->size; |
463 | } | 463 | } |
464 | 464 | ||
465 | /* write 8-byte non-SOP, non-wrap chunk data */ | 465 | /* write 8-byte non-SOP, non-wrap chunk data */ |
@@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) | |||
492 | */ | 492 | */ |
493 | /* adjust if we have wrapped */ | 493 | /* adjust if we have wrapped */ |
494 | if (dest >= pbuf->end) | 494 | if (dest >= pbuf->end) |
495 | dest -= pbuf->size; | 495 | dest -= pbuf->sc->size; |
496 | /* jump to the SOP range if within the first block */ | 496 | /* jump to the SOP range if within the first block */ |
497 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | 497 | else if (pbuf->qw_written < PIO_BLOCK_QWS) |
498 | dest += SOP_DISTANCE; | 498 | dest += SOP_DISTANCE; |
@@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf, | |||
584 | dest += sizeof(u64); | 584 | dest += sizeof(u64); |
585 | } | 585 | } |
586 | 586 | ||
587 | dest -= pbuf->size; | 587 | dest -= pbuf->sc->size; |
588 | dend -= pbuf->size; | 588 | dend -= pbuf->sc->size; |
589 | } | 589 | } |
590 | 590 | ||
591 | /* write 8-byte non-SOP, non-wrap chunk data */ | 591 | /* write 8-byte non-SOP, non-wrap chunk data */ |
@@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) | |||
666 | */ | 666 | */ |
667 | /* adjust if we've wrapped */ | 667 | /* adjust if we've wrapped */ |
668 | if (dest >= pbuf->end) | 668 | if (dest >= pbuf->end) |
669 | dest -= pbuf->size; | 669 | dest -= pbuf->sc->size; |
670 | /* jump to SOP range if within the first block */ | 670 | /* jump to SOP range if within the first block */ |
671 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | 671 | else if (pbuf->qw_written < PIO_BLOCK_QWS) |
672 | dest += SOP_DISTANCE; | 672 | dest += SOP_DISTANCE; |
@@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf) | |||
719 | */ | 719 | */ |
720 | /* adjust if we have wrapped */ | 720 | /* adjust if we have wrapped */ |
721 | if (dest >= pbuf->end) | 721 | if (dest >= pbuf->end) |
722 | dest -= pbuf->size; | 722 | dest -= pbuf->sc->size; |
723 | /* jump to the SOP range if within the first block */ | 723 | /* jump to the SOP range if within the first block */ |
724 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | 724 | else if (pbuf->qw_written < PIO_BLOCK_QWS) |
725 | dest += SOP_DISTANCE; | 725 | dest += SOP_DISTANCE; |