diff options
author | Shirley Ma <xma@us.ibm.com> | 2006-04-10 12:43:58 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-04-10 12:43:58 -0400 |
commit | 0f4852513fb07405ce88da40d8c497060561246e (patch) | |
tree | 4221d43a7868b0431d7564601017d8f1449cd13d /drivers/infiniband | |
parent | f2de3b06126ddb07d0e4617225d74dce0855add3 (diff) |
IPoIB: Make send and receive queue sizes tunable
Make IPoIB's send and receive queue sizes tunable via module
parameters ("send_queue_size" and "recv_queue_size"). This allows the
queue sizes to be enlarged to fix disastrously bad performance on some
platforms and workloads, without bloating memory usage when large
queues aren't needed.
Signed-off-by: Shirley Ma <xma@us.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 22 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 28 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 6 |
4 files changed, 40 insertions, 20 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 374109df730..12a1e0572ef 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -65,6 +65,8 @@ enum { | |||
65 | 65 | ||
66 | IPOIB_RX_RING_SIZE = 128, | 66 | IPOIB_RX_RING_SIZE = 128, |
67 | IPOIB_TX_RING_SIZE = 64, | 67 | IPOIB_TX_RING_SIZE = 64, |
68 | IPOIB_MAX_QUEUE_SIZE = 8192, | ||
69 | IPOIB_MIN_QUEUE_SIZE = 2, | ||
68 | 70 | ||
69 | IPOIB_NUM_WC = 4, | 71 | IPOIB_NUM_WC = 4, |
70 | 72 | ||
@@ -332,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { } | |||
332 | #define ipoib_warn(priv, format, arg...) \ | 334 | #define ipoib_warn(priv, format, arg...) \ |
333 | ipoib_printk(KERN_WARNING, priv, format , ## arg) | 335 | ipoib_printk(KERN_WARNING, priv, format , ## arg) |
334 | 336 | ||
337 | extern int ipoib_sendq_size; | ||
338 | extern int ipoib_recvq_size; | ||
335 | 339 | ||
336 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 340 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
337 | extern int ipoib_debug_level; | 341 | extern int ipoib_debug_level; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ed65202878d..a54da42849a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c | |||
@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev) | |||
161 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 161 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
162 | int i; | 162 | int i; |
163 | 163 | ||
164 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { | 164 | for (i = 0; i < ipoib_recvq_size; ++i) { |
165 | if (ipoib_alloc_rx_skb(dev, i)) { | 165 | if (ipoib_alloc_rx_skb(dev, i)) { |
166 | ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); | 166 | ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); |
167 | return -ENOMEM; | 167 | return -ENOMEM; |
@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
187 | if (wr_id & IPOIB_OP_RECV) { | 187 | if (wr_id & IPOIB_OP_RECV) { |
188 | wr_id &= ~IPOIB_OP_RECV; | 188 | wr_id &= ~IPOIB_OP_RECV; |
189 | 189 | ||
190 | if (wr_id < IPOIB_RX_RING_SIZE) { | 190 | if (wr_id < ipoib_recvq_size) { |
191 | struct sk_buff *skb = priv->rx_ring[wr_id].skb; | 191 | struct sk_buff *skb = priv->rx_ring[wr_id].skb; |
192 | dma_addr_t addr = priv->rx_ring[wr_id].mapping; | 192 | dma_addr_t addr = priv->rx_ring[wr_id].mapping; |
193 | 193 | ||
@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
252 | struct ipoib_tx_buf *tx_req; | 252 | struct ipoib_tx_buf *tx_req; |
253 | unsigned long flags; | 253 | unsigned long flags; |
254 | 254 | ||
255 | if (wr_id >= IPOIB_TX_RING_SIZE) { | 255 | if (wr_id >= ipoib_sendq_size) { |
256 | ipoib_warn(priv, "completion event with wrid %d (> %d)\n", | 256 | ipoib_warn(priv, "completion event with wrid %d (> %d)\n", |
257 | wr_id, IPOIB_TX_RING_SIZE); | 257 | wr_id, ipoib_sendq_size); |
258 | return; | 258 | return; |
259 | } | 259 | } |
260 | 260 | ||
@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
275 | spin_lock_irqsave(&priv->tx_lock, flags); | 275 | spin_lock_irqsave(&priv->tx_lock, flags); |
276 | ++priv->tx_tail; | 276 | ++priv->tx_tail; |
277 | if (netif_queue_stopped(dev) && | 277 | if (netif_queue_stopped(dev) && |
278 | priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) | 278 | priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) |
279 | netif_wake_queue(dev); | 279 | netif_wake_queue(dev); |
280 | spin_unlock_irqrestore(&priv->tx_lock, flags); | 280 | spin_unlock_irqrestore(&priv->tx_lock, flags); |
281 | 281 | ||
@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
344 | * means we have to make sure everything is properly recorded and | 344 | * means we have to make sure everything is properly recorded and |
345 | * our state is consistent before we call post_send(). | 345 | * our state is consistent before we call post_send(). |
346 | */ | 346 | */ |
347 | tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)]; | 347 | tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; |
348 | tx_req->skb = skb; | 348 | tx_req->skb = skb; |
349 | addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, | 349 | addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, |
350 | DMA_TO_DEVICE); | 350 | DMA_TO_DEVICE); |
351 | pci_unmap_addr_set(tx_req, mapping, addr); | 351 | pci_unmap_addr_set(tx_req, mapping, addr); |
352 | 352 | ||
353 | if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), | 353 | if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), |
354 | address->ah, qpn, addr, skb->len))) { | 354 | address->ah, qpn, addr, skb->len))) { |
355 | ipoib_warn(priv, "post_send failed\n"); | 355 | ipoib_warn(priv, "post_send failed\n"); |
356 | ++priv->stats.tx_errors; | 356 | ++priv->stats.tx_errors; |
@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
363 | address->last_send = priv->tx_head; | 363 | address->last_send = priv->tx_head; |
364 | ++priv->tx_head; | 364 | ++priv->tx_head; |
365 | 365 | ||
366 | if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) { | 366 | if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { |
367 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); | 367 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); |
368 | netif_stop_queue(dev); | 368 | netif_stop_queue(dev); |
369 | } | 369 | } |
@@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev) | |||
488 | int pending = 0; | 488 | int pending = 0; |
489 | int i; | 489 | int i; |
490 | 490 | ||
491 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) | 491 | for (i = 0; i < ipoib_recvq_size; ++i) |
492 | if (priv->rx_ring[i].skb) | 492 | if (priv->rx_ring[i].skb) |
493 | ++pending; | 493 | ++pending; |
494 | 494 | ||
@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) | |||
527 | */ | 527 | */ |
528 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { | 528 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { |
529 | tx_req = &priv->tx_ring[priv->tx_tail & | 529 | tx_req = &priv->tx_ring[priv->tx_tail & |
530 | (IPOIB_TX_RING_SIZE - 1)]; | 530 | (ipoib_sendq_size - 1)]; |
531 | dma_unmap_single(priv->ca->dma_device, | 531 | dma_unmap_single(priv->ca->dma_device, |
532 | pci_unmap_addr(tx_req, mapping), | 532 | pci_unmap_addr(tx_req, mapping), |
533 | tx_req->skb->len, | 533 | tx_req->skb->len, |
@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) | |||
536 | ++priv->tx_tail; | 536 | ++priv->tx_tail; |
537 | } | 537 | } |
538 | 538 | ||
539 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) | 539 | for (i = 0; i < ipoib_recvq_size; ++i) |
540 | if (priv->rx_ring[i].skb) { | 540 | if (priv->rx_ring[i].skb) { |
541 | dma_unmap_single(priv->ca->dma_device, | 541 | dma_unmap_single(priv->ca->dma_device, |
542 | pci_unmap_addr(&priv->rx_ring[i], | 542 | pci_unmap_addr(&priv->rx_ring[i], |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9cb9e430aaa..5bf7e263454 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/init.h> | 41 | #include <linux/init.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
44 | #include <linux/kernel.h> | ||
44 | 45 | ||
45 | #include <linux/if_arp.h> /* For ARPHRD_xxx */ | 46 | #include <linux/if_arp.h> /* For ARPHRD_xxx */ |
46 | 47 | ||
@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier"); | |||
53 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); | 54 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); |
54 | MODULE_LICENSE("Dual BSD/GPL"); | 55 | MODULE_LICENSE("Dual BSD/GPL"); |
55 | 56 | ||
57 | int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; | ||
58 | int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; | ||
59 | |||
60 | module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); | ||
61 | MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); | ||
62 | module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); | ||
63 | MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); | ||
64 | |||
56 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 65 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
57 | int ipoib_debug_level; | 66 | int ipoib_debug_level; |
58 | 67 | ||
@@ -795,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) | |||
795 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 804 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
796 | 805 | ||
797 | /* Allocate RX/TX "rings" to hold queued skbs */ | 806 | /* Allocate RX/TX "rings" to hold queued skbs */ |
798 | 807 | priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, | |
799 | priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), | ||
800 | GFP_KERNEL); | 808 | GFP_KERNEL); |
801 | if (!priv->rx_ring) { | 809 | if (!priv->rx_ring) { |
802 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", | 810 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", |
803 | ca->name, IPOIB_RX_RING_SIZE); | 811 | ca->name, ipoib_recvq_size); |
804 | goto out; | 812 | goto out; |
805 | } | 813 | } |
806 | 814 | ||
807 | priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), | 815 | priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, |
808 | GFP_KERNEL); | 816 | GFP_KERNEL); |
809 | if (!priv->tx_ring) { | 817 | if (!priv->tx_ring) { |
810 | printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", | 818 | printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", |
811 | ca->name, IPOIB_TX_RING_SIZE); | 819 | ca->name, ipoib_sendq_size); |
812 | goto out_rx_ring_cleanup; | 820 | goto out_rx_ring_cleanup; |
813 | } | 821 | } |
814 | 822 | ||
@@ -876,7 +884,7 @@ static void ipoib_setup(struct net_device *dev) | |||
876 | dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; | 884 | dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; |
877 | dev->addr_len = INFINIBAND_ALEN; | 885 | dev->addr_len = INFINIBAND_ALEN; |
878 | dev->type = ARPHRD_INFINIBAND; | 886 | dev->type = ARPHRD_INFINIBAND; |
879 | dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; | 887 | dev->tx_queue_len = ipoib_sendq_size * 2; |
880 | dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; | 888 | dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; |
881 | 889 | ||
882 | /* MTU will be reset when mcast join happens */ | 890 | /* MTU will be reset when mcast join happens */ |
@@ -1128,6 +1136,14 @@ static int __init ipoib_init_module(void) | |||
1128 | { | 1136 | { |
1129 | int ret; | 1137 | int ret; |
1130 | 1138 | ||
1139 | ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); | ||
1140 | ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); | ||
1141 | ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); | ||
1142 | |||
1143 | ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); | ||
1144 | ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); | ||
1145 | ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); | ||
1146 | |||
1131 | ret = ipoib_register_debugfs(); | 1147 | ret = ipoib_register_debugfs(); |
1132 | if (ret) | 1148 | if (ret) |
1133 | return ret; | 1149 | return ret; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 5f0388027b2..1d49d1643c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c | |||
@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
159 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 159 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
160 | struct ib_qp_init_attr init_attr = { | 160 | struct ib_qp_init_attr init_attr = { |
161 | .cap = { | 161 | .cap = { |
162 | .max_send_wr = IPOIB_TX_RING_SIZE, | 162 | .max_send_wr = ipoib_sendq_size, |
163 | .max_recv_wr = IPOIB_RX_RING_SIZE, | 163 | .max_recv_wr = ipoib_recvq_size, |
164 | .max_send_sge = 1, | 164 | .max_send_sge = 1, |
165 | .max_recv_sge = 1 | 165 | .max_recv_sge = 1 |
166 | }, | 166 | }, |
@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
175 | } | 175 | } |
176 | 176 | ||
177 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, | 177 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, |
178 | IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1); | 178 | ipoib_sendq_size + ipoib_recvq_size + 1); |
179 | if (IS_ERR(priv->cq)) { | 179 | if (IS_ERR(priv->cq)) { |
180 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); | 180 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); |
181 | goto out_free_pd; | 181 | goto out_free_pd; |