diff options
| author | Shirley Ma <xma@us.ibm.com> | 2006-04-10 12:43:58 -0400 |
|---|---|---|
| committer | Roland Dreier <rolandd@cisco.com> | 2006-04-10 12:43:58 -0400 |
| commit | 0f4852513fb07405ce88da40d8c497060561246e (patch) | |
| tree | 4221d43a7868b0431d7564601017d8f1449cd13d /drivers | |
| parent | f2de3b06126ddb07d0e4617225d74dce0855add3 (diff) | |
IPoIB: Make send and receive queue sizes tunable
Make IPoIB's send and receive queue sizes tunable via module
parameters ("send_queue_size" and "recv_queue_size"). This allows the
queue sizes to be enlarged to fix disastrously bad performance on some
platforms and workloads, without bloating memory usage when large
queues aren't needed.
Signed-off-by: Shirley Ma <xma@us.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 4 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 22 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 28 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 6 |
4 files changed, 40 insertions, 20 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 374109df7303..12a1e0572ef2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
| @@ -65,6 +65,8 @@ enum { | |||
| 65 | 65 | ||
| 66 | IPOIB_RX_RING_SIZE = 128, | 66 | IPOIB_RX_RING_SIZE = 128, |
| 67 | IPOIB_TX_RING_SIZE = 64, | 67 | IPOIB_TX_RING_SIZE = 64, |
| 68 | IPOIB_MAX_QUEUE_SIZE = 8192, | ||
| 69 | IPOIB_MIN_QUEUE_SIZE = 2, | ||
| 68 | 70 | ||
| 69 | IPOIB_NUM_WC = 4, | 71 | IPOIB_NUM_WC = 4, |
| 70 | 72 | ||
| @@ -332,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { } | |||
| 332 | #define ipoib_warn(priv, format, arg...) \ | 334 | #define ipoib_warn(priv, format, arg...) \ |
| 333 | ipoib_printk(KERN_WARNING, priv, format , ## arg) | 335 | ipoib_printk(KERN_WARNING, priv, format , ## arg) |
| 334 | 336 | ||
| 337 | extern int ipoib_sendq_size; | ||
| 338 | extern int ipoib_recvq_size; | ||
| 335 | 339 | ||
| 336 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 340 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
| 337 | extern int ipoib_debug_level; | 341 | extern int ipoib_debug_level; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ed65202878d8..a54da42849ae 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c | |||
| @@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev) | |||
| 161 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 161 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
| 162 | int i; | 162 | int i; |
| 163 | 163 | ||
| 164 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { | 164 | for (i = 0; i < ipoib_recvq_size; ++i) { |
| 165 | if (ipoib_alloc_rx_skb(dev, i)) { | 165 | if (ipoib_alloc_rx_skb(dev, i)) { |
| 166 | ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); | 166 | ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); |
| 167 | return -ENOMEM; | 167 | return -ENOMEM; |
| @@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
| 187 | if (wr_id & IPOIB_OP_RECV) { | 187 | if (wr_id & IPOIB_OP_RECV) { |
| 188 | wr_id &= ~IPOIB_OP_RECV; | 188 | wr_id &= ~IPOIB_OP_RECV; |
| 189 | 189 | ||
| 190 | if (wr_id < IPOIB_RX_RING_SIZE) { | 190 | if (wr_id < ipoib_recvq_size) { |
| 191 | struct sk_buff *skb = priv->rx_ring[wr_id].skb; | 191 | struct sk_buff *skb = priv->rx_ring[wr_id].skb; |
| 192 | dma_addr_t addr = priv->rx_ring[wr_id].mapping; | 192 | dma_addr_t addr = priv->rx_ring[wr_id].mapping; |
| 193 | 193 | ||
| @@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
| 252 | struct ipoib_tx_buf *tx_req; | 252 | struct ipoib_tx_buf *tx_req; |
| 253 | unsigned long flags; | 253 | unsigned long flags; |
| 254 | 254 | ||
| 255 | if (wr_id >= IPOIB_TX_RING_SIZE) { | 255 | if (wr_id >= ipoib_sendq_size) { |
| 256 | ipoib_warn(priv, "completion event with wrid %d (> %d)\n", | 256 | ipoib_warn(priv, "completion event with wrid %d (> %d)\n", |
| 257 | wr_id, IPOIB_TX_RING_SIZE); | 257 | wr_id, ipoib_sendq_size); |
| 258 | return; | 258 | return; |
| 259 | } | 259 | } |
| 260 | 260 | ||
| @@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
| 275 | spin_lock_irqsave(&priv->tx_lock, flags); | 275 | spin_lock_irqsave(&priv->tx_lock, flags); |
| 276 | ++priv->tx_tail; | 276 | ++priv->tx_tail; |
| 277 | if (netif_queue_stopped(dev) && | 277 | if (netif_queue_stopped(dev) && |
| 278 | priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) | 278 | priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) |
| 279 | netif_wake_queue(dev); | 279 | netif_wake_queue(dev); |
| 280 | spin_unlock_irqrestore(&priv->tx_lock, flags); | 280 | spin_unlock_irqrestore(&priv->tx_lock, flags); |
| 281 | 281 | ||
| @@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
| 344 | * means we have to make sure everything is properly recorded and | 344 | * means we have to make sure everything is properly recorded and |
| 345 | * our state is consistent before we call post_send(). | 345 | * our state is consistent before we call post_send(). |
| 346 | */ | 346 | */ |
| 347 | tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)]; | 347 | tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; |
| 348 | tx_req->skb = skb; | 348 | tx_req->skb = skb; |
| 349 | addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, | 349 | addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, |
| 350 | DMA_TO_DEVICE); | 350 | DMA_TO_DEVICE); |
| 351 | pci_unmap_addr_set(tx_req, mapping, addr); | 351 | pci_unmap_addr_set(tx_req, mapping, addr); |
| 352 | 352 | ||
| 353 | if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), | 353 | if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), |
| 354 | address->ah, qpn, addr, skb->len))) { | 354 | address->ah, qpn, addr, skb->len))) { |
| 355 | ipoib_warn(priv, "post_send failed\n"); | 355 | ipoib_warn(priv, "post_send failed\n"); |
| 356 | ++priv->stats.tx_errors; | 356 | ++priv->stats.tx_errors; |
| @@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
| 363 | address->last_send = priv->tx_head; | 363 | address->last_send = priv->tx_head; |
| 364 | ++priv->tx_head; | 364 | ++priv->tx_head; |
| 365 | 365 | ||
| 366 | if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) { | 366 | if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { |
| 367 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); | 367 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); |
| 368 | netif_stop_queue(dev); | 368 | netif_stop_queue(dev); |
| 369 | } | 369 | } |
| @@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev) | |||
| 488 | int pending = 0; | 488 | int pending = 0; |
| 489 | int i; | 489 | int i; |
| 490 | 490 | ||
| 491 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) | 491 | for (i = 0; i < ipoib_recvq_size; ++i) |
| 492 | if (priv->rx_ring[i].skb) | 492 | if (priv->rx_ring[i].skb) |
| 493 | ++pending; | 493 | ++pending; |
| 494 | 494 | ||
| @@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) | |||
| 527 | */ | 527 | */ |
| 528 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { | 528 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { |
| 529 | tx_req = &priv->tx_ring[priv->tx_tail & | 529 | tx_req = &priv->tx_ring[priv->tx_tail & |
| 530 | (IPOIB_TX_RING_SIZE - 1)]; | 530 | (ipoib_sendq_size - 1)]; |
| 531 | dma_unmap_single(priv->ca->dma_device, | 531 | dma_unmap_single(priv->ca->dma_device, |
| 532 | pci_unmap_addr(tx_req, mapping), | 532 | pci_unmap_addr(tx_req, mapping), |
| 533 | tx_req->skb->len, | 533 | tx_req->skb->len, |
| @@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) | |||
| 536 | ++priv->tx_tail; | 536 | ++priv->tx_tail; |
| 537 | } | 537 | } |
| 538 | 538 | ||
| 539 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) | 539 | for (i = 0; i < ipoib_recvq_size; ++i) |
| 540 | if (priv->rx_ring[i].skb) { | 540 | if (priv->rx_ring[i].skb) { |
| 541 | dma_unmap_single(priv->ca->dma_device, | 541 | dma_unmap_single(priv->ca->dma_device, |
| 542 | pci_unmap_addr(&priv->rx_ring[i], | 542 | pci_unmap_addr(&priv->rx_ring[i], |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9cb9e430aaaf..5bf7e263454b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include <linux/init.h> | 41 | #include <linux/init.h> |
| 42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
| 43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
| 44 | #include <linux/kernel.h> | ||
| 44 | 45 | ||
| 45 | #include <linux/if_arp.h> /* For ARPHRD_xxx */ | 46 | #include <linux/if_arp.h> /* For ARPHRD_xxx */ |
| 46 | 47 | ||
| @@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier"); | |||
| 53 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); | 54 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); |
| 54 | MODULE_LICENSE("Dual BSD/GPL"); | 55 | MODULE_LICENSE("Dual BSD/GPL"); |
| 55 | 56 | ||
| 57 | int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; | ||
| 58 | int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; | ||
| 59 | |||
| 60 | module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); | ||
| 61 | MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); | ||
| 62 | module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); | ||
| 63 | MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); | ||
| 64 | |||
| 56 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 65 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
| 57 | int ipoib_debug_level; | 66 | int ipoib_debug_level; |
| 58 | 67 | ||
| @@ -795,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) | |||
| 795 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 804 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
| 796 | 805 | ||
| 797 | /* Allocate RX/TX "rings" to hold queued skbs */ | 806 | /* Allocate RX/TX "rings" to hold queued skbs */ |
| 798 | 807 | priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, | |
| 799 | priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), | ||
| 800 | GFP_KERNEL); | 808 | GFP_KERNEL); |
| 801 | if (!priv->rx_ring) { | 809 | if (!priv->rx_ring) { |
| 802 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", | 810 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", |
| 803 | ca->name, IPOIB_RX_RING_SIZE); | 811 | ca->name, ipoib_recvq_size); |
| 804 | goto out; | 812 | goto out; |
| 805 | } | 813 | } |
| 806 | 814 | ||
| 807 | priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), | 815 | priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, |
| 808 | GFP_KERNEL); | 816 | GFP_KERNEL); |
| 809 | if (!priv->tx_ring) { | 817 | if (!priv->tx_ring) { |
| 810 | printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", | 818 | printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", |
| 811 | ca->name, IPOIB_TX_RING_SIZE); | 819 | ca->name, ipoib_sendq_size); |
| 812 | goto out_rx_ring_cleanup; | 820 | goto out_rx_ring_cleanup; |
| 813 | } | 821 | } |
| 814 | 822 | ||
| @@ -876,7 +884,7 @@ static void ipoib_setup(struct net_device *dev) | |||
| 876 | dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; | 884 | dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; |
| 877 | dev->addr_len = INFINIBAND_ALEN; | 885 | dev->addr_len = INFINIBAND_ALEN; |
| 878 | dev->type = ARPHRD_INFINIBAND; | 886 | dev->type = ARPHRD_INFINIBAND; |
| 879 | dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; | 887 | dev->tx_queue_len = ipoib_sendq_size * 2; |
| 880 | dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; | 888 | dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; |
| 881 | 889 | ||
| 882 | /* MTU will be reset when mcast join happens */ | 890 | /* MTU will be reset when mcast join happens */ |
| @@ -1128,6 +1136,14 @@ static int __init ipoib_init_module(void) | |||
| 1128 | { | 1136 | { |
| 1129 | int ret; | 1137 | int ret; |
| 1130 | 1138 | ||
| 1139 | ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); | ||
| 1140 | ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); | ||
| 1141 | ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); | ||
| 1142 | |||
| 1143 | ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); | ||
| 1144 | ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); | ||
| 1145 | ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); | ||
| 1146 | |||
| 1131 | ret = ipoib_register_debugfs(); | 1147 | ret = ipoib_register_debugfs(); |
| 1132 | if (ret) | 1148 | if (ret) |
| 1133 | return ret; | 1149 | return ret; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 5f0388027b25..1d49d1643c59 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c | |||
| @@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
| 159 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 159 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
| 160 | struct ib_qp_init_attr init_attr = { | 160 | struct ib_qp_init_attr init_attr = { |
| 161 | .cap = { | 161 | .cap = { |
| 162 | .max_send_wr = IPOIB_TX_RING_SIZE, | 162 | .max_send_wr = ipoib_sendq_size, |
| 163 | .max_recv_wr = IPOIB_RX_RING_SIZE, | 163 | .max_recv_wr = ipoib_recvq_size, |
| 164 | .max_send_sge = 1, | 164 | .max_send_sge = 1, |
| 165 | .max_recv_sge = 1 | 165 | .max_recv_sge = 1 |
| 166 | }, | 166 | }, |
| @@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, | 177 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, |
| 178 | IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1); | 178 | ipoib_sendq_size + ipoib_recvq_size + 1); |
| 179 | if (IS_ERR(priv->cq)) { | 179 | if (IS_ERR(priv->cq)) { |
| 180 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); | 180 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); |
| 181 | goto out_free_pd; | 181 | goto out_free_pd; |
