diff options
author | Sebastian Sanchez <sebastian.sanchez@intel.com> | 2016-10-25 16:12:28 -0400 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-11-15 16:37:27 -0500 |
commit | 2474d775d9e2f935ff6840c8b21b4262afacc821 (patch) | |
tree | 488fd7bff5f3edf1eafec2434ff6b3564cb1202c | |
parent | fe4d924396a861937256293ff4a84b76b84854d8 (diff) |
IB/hfi1: Get rid of divide in pio buffer allocator
The div instruction shows costly in profiles.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.c | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.h | 1 |
2 files changed, 8 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 385e4dcf2cd3..516fac38d31e 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c | |||
@@ -1249,6 +1249,7 @@ int sc_enable(struct send_context *sc) | |||
1249 | sc->free = 0; | 1249 | sc->free = 0; |
1250 | sc->alloc_free = 0; | 1250 | sc->alloc_free = 0; |
1251 | sc->fill = 0; | 1251 | sc->fill = 0; |
1252 | sc->fill_wrap = 0; | ||
1252 | sc->sr_head = 0; | 1253 | sc->sr_head = 0; |
1253 | sc->sr_tail = 0; | 1254 | sc->sr_tail = 0; |
1254 | sc->flags = 0; | 1255 | sc->flags = 0; |
@@ -1392,7 +1393,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, | |||
1392 | unsigned long flags; | 1393 | unsigned long flags; |
1393 | unsigned long avail; | 1394 | unsigned long avail; |
1394 | unsigned long blocks = dwords_to_blocks(dw_len); | 1395 | unsigned long blocks = dwords_to_blocks(dw_len); |
1395 | unsigned long start_fill; | 1396 | u32 fill_wrap; |
1396 | int trycount = 0; | 1397 | int trycount = 0; |
1397 | u32 head, next; | 1398 | u32 head, next; |
1398 | 1399 | ||
@@ -1435,8 +1436,11 @@ retry: | |||
1435 | head = sc->sr_head; | 1436 | head = sc->sr_head; |
1436 | 1437 | ||
1437 | /* "allocate" the buffer */ | 1438 | /* "allocate" the buffer */ |
1438 | start_fill = sc->fill; | ||
1439 | sc->fill += blocks; | 1439 | sc->fill += blocks; |
1440 | fill_wrap = sc->fill_wrap; | ||
1441 | sc->fill_wrap += blocks; | ||
1442 | if (sc->fill_wrap >= sc->credits) | ||
1443 | sc->fill_wrap = sc->fill_wrap - sc->credits; | ||
1440 | 1444 | ||
1441 | /* | 1445 | /* |
1442 | * Fill the parts that the releaser looks at before moving the head. | 1446 | * Fill the parts that the releaser looks at before moving the head. |
@@ -1465,8 +1469,7 @@ retry: | |||
1465 | spin_unlock_irqrestore(&sc->alloc_lock, flags); | 1469 | spin_unlock_irqrestore(&sc->alloc_lock, flags); |
1466 | 1470 | ||
1467 | /* finish filling in the buffer outside the lock */ | 1471 | /* finish filling in the buffer outside the lock */ |
1468 | pbuf->start = sc->base_addr + ((start_fill % sc->credits) | 1472 | pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; |
1469 | * PIO_BLOCK_SIZE); | ||
1470 | pbuf->size = sc->credits * PIO_BLOCK_SIZE; | 1473 | pbuf->size = sc->credits * PIO_BLOCK_SIZE; |
1471 | pbuf->end = sc->base_addr + pbuf->size; | 1474 | pbuf->end = sc->base_addr + pbuf->size; |
1472 | pbuf->block_count = blocks; | 1475 | pbuf->block_count = blocks; |
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index bd19507b6bb0..498b548055e0 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h | |||
@@ -119,6 +119,7 @@ struct send_context { | |||
119 | unsigned long fill; /* official alloc count */ | 119 | unsigned long fill; /* official alloc count */ |
120 | unsigned long alloc_free; /* copy of free (less cache thrash) */ | 120 | unsigned long alloc_free; /* copy of free (less cache thrash) */ |
121 | u32 __percpu *buffers_allocated;/* count of buffers allocated */ | 121 | u32 __percpu *buffers_allocated;/* count of buffers allocated */ |
122 | u32 fill_wrap; /* tracks fill within ring */ | ||
122 | /* releaser fields */ | 123 | /* releaser fields */ |
123 | spinlock_t release_lock ____cacheline_aligned_in_smp; | 124 | spinlock_t release_lock ____cacheline_aligned_in_smp; |
124 | u32 sr_tail; /* shadow ring tail */ | 125 | u32 sr_tail; /* shadow ring tail */ |