diff options
author | Ananda Raju <Ananda.Raju@neterion.com> | 2006-04-21 19:03:13 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-05-02 15:16:35 -0400 |
commit | 863c11a91e4507c3ff44783a75a5433c8cf7700e (patch) | |
tree | f1a3e065ed6787c693b8b56418c571dc950bdb88 /drivers/net/s2io.c | |
parent | 1fb5fef9b80d9a3b5368e22031627afd1585487b (diff) |
[PATCH] s2io: performance improvements
hi,
This patch contains all the changes that were done to improve
performance of s2io driver. one line description of the changes
are
1. For Non NAPI case the rx interrupt handler is being called
unconditionally
2. code optimization and adding prefetch skb->data
3. Remove modulo operations in fast path
4. Enable Group Reads and set backoff interval to 0x1000
5. correct PIC_CNTL_SHARED_SPLITS macro definition, and reduce
pause parameter
6. Corrected logic of identifying rx buffer level in rx_buffer_level()
7. fix DMA map and unmap done with different sizes in 1-buf mode
8. Removed forcible disabling of ERO
9. Send up the packets with transfer code = 0x5
Signed-off-by: Ananda Raju <ananda.raju@neterion.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
Diffstat (limited to 'drivers/net/s2io.c')
-rw-r--r-- | drivers/net/s2io.c | 140 |
1 files changed, 74 insertions, 66 deletions
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 79208f434ac1..846873159662 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c | |||
@@ -106,18 +106,14 @@ static inline int RXD_IS_UP2DT(RxD_t *rxdp) | |||
106 | #define LOW 2 | 106 | #define LOW 2 |
107 | static inline int rx_buffer_level(nic_t * sp, int rxb_size, int ring) | 107 | static inline int rx_buffer_level(nic_t * sp, int rxb_size, int ring) |
108 | { | 108 | { |
109 | int level = 0; | ||
110 | mac_info_t *mac_control; | 109 | mac_info_t *mac_control; |
111 | 110 | ||
112 | mac_control = &sp->mac_control; | 111 | mac_control = &sp->mac_control; |
113 | if ((mac_control->rings[ring].pkt_cnt - rxb_size) > 16) { | 112 | if (rxb_size <= rxd_count[sp->rxd_mode]) |
114 | level = LOW; | 113 | return PANIC; |
115 | if (rxb_size <= rxd_count[sp->rxd_mode]) { | 114 | else if ((mac_control->rings[ring].pkt_cnt - rxb_size) > 16) |
116 | level = PANIC; | 115 | return LOW; |
117 | } | 116 | return 0; |
118 | } | ||
119 | |||
120 | return level; | ||
121 | } | 117 | } |
122 | 118 | ||
123 | /* Ethtool related variables and Macros. */ | 119 | /* Ethtool related variables and Macros. */ |
@@ -311,7 +307,7 @@ static unsigned int rts_frm_len[MAX_RX_RINGS] = | |||
311 | {[0 ...(MAX_RX_RINGS - 1)] = 0 }; | 307 | {[0 ...(MAX_RX_RINGS - 1)] = 0 }; |
312 | static unsigned int rx_ring_mode = 1; | 308 | static unsigned int rx_ring_mode = 1; |
313 | static unsigned int use_continuous_tx_intrs = 1; | 309 | static unsigned int use_continuous_tx_intrs = 1; |
314 | static unsigned int rmac_pause_time = 65535; | 310 | static unsigned int rmac_pause_time = 0x100; |
315 | static unsigned int mc_pause_threshold_q0q3 = 187; | 311 | static unsigned int mc_pause_threshold_q0q3 = 187; |
316 | static unsigned int mc_pause_threshold_q4q7 = 187; | 312 | static unsigned int mc_pause_threshold_q4q7 = 187; |
317 | static unsigned int shared_splits; | 313 | static unsigned int shared_splits; |
@@ -1545,13 +1541,22 @@ static int init_nic(struct s2io_nic *nic) | |||
1545 | val64 |= PIC_CNTL_SHARED_SPLITS(shared_splits); | 1541 | val64 |= PIC_CNTL_SHARED_SPLITS(shared_splits); |
1546 | writeq(val64, &bar0->pic_control); | 1542 | writeq(val64, &bar0->pic_control); |
1547 | 1543 | ||
1544 | if (nic->config.bus_speed == 266) { | ||
1545 | writeq(TXREQTO_VAL(0x7f) | TXREQTO_EN, &bar0->txreqtimeout); | ||
1546 | writeq(0x0, &bar0->read_retry_delay); | ||
1547 | writeq(0x0, &bar0->write_retry_delay); | ||
1548 | } | ||
1549 | |||
1548 | /* | 1550 | /* |
1549 | * Programming the Herc to split every write transaction | 1551 | * Programming the Herc to split every write transaction |
1550 | * that does not start on an ADB to reduce disconnects. | 1552 | * that does not start on an ADB to reduce disconnects. |
1551 | */ | 1553 | */ |
1552 | if (nic->device_type == XFRAME_II_DEVICE) { | 1554 | if (nic->device_type == XFRAME_II_DEVICE) { |
1553 | val64 = WREQ_SPLIT_MASK_SET_MASK(255); | 1555 | val64 = EXT_REQ_EN | MISC_LINK_STABILITY_PRD(3); |
1554 | writeq(val64, &bar0->wreq_split_mask); | 1556 | writeq(val64, &bar0->misc_control); |
1557 | val64 = readq(&bar0->pic_control2); | ||
1558 | val64 &= ~(BIT(13)|BIT(14)|BIT(15)); | ||
1559 | writeq(val64, &bar0->pic_control2); | ||
1555 | } | 1560 | } |
1556 | 1561 | ||
1557 | /* Setting Link stability period to 64 ms */ | 1562 | /* Setting Link stability period to 64 ms */ |
@@ -1948,6 +1953,10 @@ static int start_nic(struct s2io_nic *nic) | |||
1948 | val64 |= PRC_CTRL_RC_ENABLED; | 1953 | val64 |= PRC_CTRL_RC_ENABLED; |
1949 | else | 1954 | else |
1950 | val64 |= PRC_CTRL_RC_ENABLED | PRC_CTRL_RING_MODE_3; | 1955 | val64 |= PRC_CTRL_RC_ENABLED | PRC_CTRL_RING_MODE_3; |
1956 | if (nic->device_type == XFRAME_II_DEVICE) | ||
1957 | val64 |= PRC_CTRL_GROUP_READS; | ||
1958 | val64 &= ~PRC_CTRL_RXD_BACKOFF_INTERVAL(0xFFFFFF); | ||
1959 | val64 |= PRC_CTRL_RXD_BACKOFF_INTERVAL(0x1000); | ||
1951 | writeq(val64, &bar0->prc_ctrl_n[i]); | 1960 | writeq(val64, &bar0->prc_ctrl_n[i]); |
1952 | } | 1961 | } |
1953 | 1962 | ||
@@ -2231,13 +2240,12 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) | |||
2231 | alloc_cnt = mac_control->rings[ring_no].pkt_cnt - | 2240 | alloc_cnt = mac_control->rings[ring_no].pkt_cnt - |
2232 | atomic_read(&nic->rx_bufs_left[ring_no]); | 2241 | atomic_read(&nic->rx_bufs_left[ring_no]); |
2233 | 2242 | ||
2243 | block_no1 = mac_control->rings[ring_no].rx_curr_get_info.block_index; | ||
2244 | off1 = mac_control->rings[ring_no].rx_curr_get_info.offset; | ||
2234 | while (alloc_tab < alloc_cnt) { | 2245 | while (alloc_tab < alloc_cnt) { |
2235 | block_no = mac_control->rings[ring_no].rx_curr_put_info. | 2246 | block_no = mac_control->rings[ring_no].rx_curr_put_info. |
2236 | block_index; | 2247 | block_index; |
2237 | block_no1 = mac_control->rings[ring_no].rx_curr_get_info. | ||
2238 | block_index; | ||
2239 | off = mac_control->rings[ring_no].rx_curr_put_info.offset; | 2248 | off = mac_control->rings[ring_no].rx_curr_put_info.offset; |
2240 | off1 = mac_control->rings[ring_no].rx_curr_get_info.offset; | ||
2241 | 2249 | ||
2242 | rxdp = mac_control->rings[ring_no]. | 2250 | rxdp = mac_control->rings[ring_no]. |
2243 | rx_blocks[block_no].rxds[off].virt_addr; | 2251 | rx_blocks[block_no].rxds[off].virt_addr; |
@@ -2307,9 +2315,9 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) | |||
2307 | memset(rxdp, 0, sizeof(RxD1_t)); | 2315 | memset(rxdp, 0, sizeof(RxD1_t)); |
2308 | skb_reserve(skb, NET_IP_ALIGN); | 2316 | skb_reserve(skb, NET_IP_ALIGN); |
2309 | ((RxD1_t*)rxdp)->Buffer0_ptr = pci_map_single | 2317 | ((RxD1_t*)rxdp)->Buffer0_ptr = pci_map_single |
2310 | (nic->pdev, skb->data, size, PCI_DMA_FROMDEVICE); | 2318 | (nic->pdev, skb->data, size - NET_IP_ALIGN, |
2311 | rxdp->Control_2 &= (~MASK_BUFFER0_SIZE_1); | 2319 | PCI_DMA_FROMDEVICE); |
2312 | rxdp->Control_2 |= SET_BUFFER0_SIZE_1(size); | 2320 | rxdp->Control_2 = SET_BUFFER0_SIZE_1(size - NET_IP_ALIGN); |
2313 | 2321 | ||
2314 | } else if (nic->rxd_mode >= RXD_MODE_3A) { | 2322 | } else if (nic->rxd_mode >= RXD_MODE_3A) { |
2315 | /* | 2323 | /* |
@@ -2516,7 +2524,7 @@ static int s2io_poll(struct net_device *dev, int *budget) | |||
2516 | mac_info_t *mac_control; | 2524 | mac_info_t *mac_control; |
2517 | struct config_param *config; | 2525 | struct config_param *config; |
2518 | XENA_dev_config_t __iomem *bar0 = nic->bar0; | 2526 | XENA_dev_config_t __iomem *bar0 = nic->bar0; |
2519 | u64 val64; | 2527 | u64 val64 = 0xFFFFFFFFFFFFFFFFULL; |
2520 | int i; | 2528 | int i; |
2521 | 2529 | ||
2522 | atomic_inc(&nic->isr_cnt); | 2530 | atomic_inc(&nic->isr_cnt); |
@@ -2528,8 +2536,8 @@ static int s2io_poll(struct net_device *dev, int *budget) | |||
2528 | nic->pkts_to_process = dev->quota; | 2536 | nic->pkts_to_process = dev->quota; |
2529 | org_pkts_to_process = nic->pkts_to_process; | 2537 | org_pkts_to_process = nic->pkts_to_process; |
2530 | 2538 | ||
2531 | val64 = readq(&bar0->rx_traffic_int); | ||
2532 | writeq(val64, &bar0->rx_traffic_int); | 2539 | writeq(val64, &bar0->rx_traffic_int); |
2540 | val64 = readl(&bar0->rx_traffic_int); | ||
2533 | 2541 | ||
2534 | for (i = 0; i < config->rx_ring_num; i++) { | 2542 | for (i = 0; i < config->rx_ring_num; i++) { |
2535 | rx_intr_handler(&mac_control->rings[i]); | 2543 | rx_intr_handler(&mac_control->rings[i]); |
@@ -2666,6 +2674,7 @@ static void rx_intr_handler(ring_info_t *ring_data) | |||
2666 | ((RxD3_t*)rxdp)->Buffer2_ptr, | 2674 | ((RxD3_t*)rxdp)->Buffer2_ptr, |
2667 | dev->mtu, PCI_DMA_FROMDEVICE); | 2675 | dev->mtu, PCI_DMA_FROMDEVICE); |
2668 | } | 2676 | } |
2677 | prefetch(skb->data); | ||
2669 | rx_osm_handler(ring_data, rxdp); | 2678 | rx_osm_handler(ring_data, rxdp); |
2670 | get_info.offset++; | 2679 | get_info.offset++; |
2671 | ring_data->rx_curr_get_info.offset = get_info.offset; | 2680 | ring_data->rx_curr_get_info.offset = get_info.offset; |
@@ -2760,7 +2769,8 @@ to loss of link\n"); | |||
2760 | dev_kfree_skb_irq(skb); | 2769 | dev_kfree_skb_irq(skb); |
2761 | 2770 | ||
2762 | get_info.offset++; | 2771 | get_info.offset++; |
2763 | get_info.offset %= get_info.fifo_len + 1; | 2772 | if (get_info.offset == get_info.fifo_len + 1) |
2773 | get_info.offset = 0; | ||
2764 | txdlp = (TxD_t *) fifo_data->list_info | 2774 | txdlp = (TxD_t *) fifo_data->list_info |
2765 | [get_info.offset].list_virt_addr; | 2775 | [get_info.offset].list_virt_addr; |
2766 | fifo_data->tx_curr_get_info.offset = | 2776 | fifo_data->tx_curr_get_info.offset = |
@@ -3545,7 +3555,8 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) | |||
3545 | 3555 | ||
3546 | queue_len = mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1; | 3556 | queue_len = mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1; |
3547 | /* Avoid "put" pointer going beyond "get" pointer */ | 3557 | /* Avoid "put" pointer going beyond "get" pointer */ |
3548 | if (txdp->Host_Control || (((put_off + 1) % queue_len) == get_off)) { | 3558 | if (txdp->Host_Control || |
3559 | ((put_off+1) == queue_len ? 0 : (put_off+1)) == get_off) { | ||
3549 | DBG_PRINT(TX_DBG, "Error in xmit, No free TXDs.\n"); | 3560 | DBG_PRINT(TX_DBG, "Error in xmit, No free TXDs.\n"); |
3550 | netif_stop_queue(dev); | 3561 | netif_stop_queue(dev); |
3551 | dev_kfree_skb(skb); | 3562 | dev_kfree_skb(skb); |
@@ -3655,11 +3666,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) | |||
3655 | mmiowb(); | 3666 | mmiowb(); |
3656 | 3667 | ||
3657 | put_off++; | 3668 | put_off++; |
3658 | put_off %= mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1; | 3669 | if (put_off == mac_control->fifos[queue].tx_curr_put_info.fifo_len + 1) |
3670 | put_off = 0; | ||
3659 | mac_control->fifos[queue].tx_curr_put_info.offset = put_off; | 3671 | mac_control->fifos[queue].tx_curr_put_info.offset = put_off; |
3660 | 3672 | ||
3661 | /* Avoid "put" pointer going beyond "get" pointer */ | 3673 | /* Avoid "put" pointer going beyond "get" pointer */ |
3662 | if (((put_off + 1) % queue_len) == get_off) { | 3674 | if (((put_off+1) == queue_len ? 0 : (put_off+1)) == get_off) { |
3663 | DBG_PRINT(TX_DBG, | 3675 | DBG_PRINT(TX_DBG, |
3664 | "No free TxDs for xmit, Put: 0x%x Get:0x%x\n", | 3676 | "No free TxDs for xmit, Put: 0x%x Get:0x%x\n", |
3665 | put_off, get_off); | 3677 | put_off, get_off); |
@@ -3887,43 +3899,37 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) | |||
3887 | return IRQ_NONE; | 3899 | return IRQ_NONE; |
3888 | } | 3900 | } |
3889 | 3901 | ||
3902 | val64 = 0xFFFFFFFFFFFFFFFFULL; | ||
3890 | #ifdef CONFIG_S2IO_NAPI | 3903 | #ifdef CONFIG_S2IO_NAPI |
3891 | if (reason & GEN_INTR_RXTRAFFIC) { | 3904 | if (reason & GEN_INTR_RXTRAFFIC) { |
3892 | if (netif_rx_schedule_prep(dev)) { | 3905 | if (netif_rx_schedule_prep(dev)) { |
3893 | en_dis_able_nic_intrs(sp, RX_TRAFFIC_INTR, | 3906 | writeq(val64, &bar0->rx_traffic_mask); |
3894 | DISABLE_INTRS); | ||
3895 | __netif_rx_schedule(dev); | 3907 | __netif_rx_schedule(dev); |
3896 | } | 3908 | } |
3897 | } | 3909 | } |
3898 | #else | 3910 | #else |
3899 | /* If Intr is because of Rx Traffic */ | 3911 | /* |
3900 | if (reason & GEN_INTR_RXTRAFFIC) { | 3912 | * Rx handler is called by default, without checking for the |
3901 | /* | 3913 | * cause of interrupt. |
3902 | * rx_traffic_int reg is an R1 register, writing all 1's | 3914 | * rx_traffic_int reg is an R1 register, writing all 1's |
3903 | * will ensure that the actual interrupt causing bit get's | 3915 | * will ensure that the actual interrupt causing bit get's |
3904 | * cleared and hence a read can be avoided. | 3916 | * cleared and hence a read can be avoided. |
3905 | */ | 3917 | */ |
3906 | val64 = 0xFFFFFFFFFFFFFFFFULL; | 3918 | writeq(val64, &bar0->rx_traffic_int); |
3907 | writeq(val64, &bar0->rx_traffic_int); | 3919 | for (i = 0; i < config->rx_ring_num; i++) { |
3908 | for (i = 0; i < config->rx_ring_num; i++) { | 3920 | rx_intr_handler(&mac_control->rings[i]); |
3909 | rx_intr_handler(&mac_control->rings[i]); | ||
3910 | } | ||
3911 | } | 3921 | } |
3912 | #endif | 3922 | #endif |
3913 | 3923 | ||
3914 | /* If Intr is because of Tx Traffic */ | 3924 | /* |
3915 | if (reason & GEN_INTR_TXTRAFFIC) { | 3925 | * tx_traffic_int reg is an R1 register, writing all 1's |
3916 | /* | 3926 | * will ensure that the actual interrupt causing bit get's |
3917 | * tx_traffic_int reg is an R1 register, writing all 1's | 3927 | * cleared and hence a read can be avoided. |
3918 | * will ensure that the actual interrupt causing bit get's | 3928 | */ |
3919 | * cleared and hence a read can be avoided. | 3929 | writeq(val64, &bar0->tx_traffic_int); |
3920 | */ | ||
3921 | val64 = 0xFFFFFFFFFFFFFFFFULL; | ||
3922 | writeq(val64, &bar0->tx_traffic_int); | ||
3923 | 3930 | ||
3924 | for (i = 0; i < config->tx_fifo_num; i++) | 3931 | for (i = 0; i < config->tx_fifo_num; i++) |
3925 | tx_intr_handler(&mac_control->fifos[i]); | 3932 | tx_intr_handler(&mac_control->fifos[i]); |
3926 | } | ||
3927 | 3933 | ||
3928 | if (reason & GEN_INTR_TXPIC) | 3934 | if (reason & GEN_INTR_TXPIC) |
3929 | s2io_txpic_intr_handle(sp); | 3935 | s2io_txpic_intr_handle(sp); |
@@ -5695,18 +5701,27 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) | |||
5695 | ((unsigned long) rxdp->Host_Control); | 5701 | ((unsigned long) rxdp->Host_Control); |
5696 | int ring_no = ring_data->ring_no; | 5702 | int ring_no = ring_data->ring_no; |
5697 | u16 l3_csum, l4_csum; | 5703 | u16 l3_csum, l4_csum; |
5704 | unsigned long long err = rxdp->Control_1 & RXD_T_CODE; | ||
5698 | lro_t *lro; | 5705 | lro_t *lro; |
5699 | 5706 | ||
5700 | skb->dev = dev; | 5707 | skb->dev = dev; |
5701 | if (rxdp->Control_1 & RXD_T_CODE) { | 5708 | if (err) { |
5702 | unsigned long long err = rxdp->Control_1 & RXD_T_CODE; | 5709 | /* |
5703 | DBG_PRINT(ERR_DBG, "%s: Rx error Value: 0x%llx\n", | 5710 | * Drop the packet if bad transfer code. Exception being |
5704 | dev->name, err); | 5711 | * 0x5, which could be due to unsupported IPv6 extension header. |
5705 | dev_kfree_skb(skb); | 5712 | * In this case, we let stack handle the packet. |
5706 | sp->stats.rx_crc_errors++; | 5713 | * Note that in this case, since checksum will be incorrect, |
5707 | atomic_dec(&sp->rx_bufs_left[ring_no]); | 5714 | * stack will validate the same. |
5708 | rxdp->Host_Control = 0; | 5715 | */ |
5709 | return 0; | 5716 | if (err && ((err >> 48) != 0x5)) { |
5717 | DBG_PRINT(ERR_DBG, "%s: Rx error Value: 0x%llx\n", | ||
5718 | dev->name, err); | ||
5719 | sp->stats.rx_crc_errors++; | ||
5720 | dev_kfree_skb(skb); | ||
5721 | atomic_dec(&sp->rx_bufs_left[ring_no]); | ||
5722 | rxdp->Host_Control = 0; | ||
5723 | return 0; | ||
5724 | } | ||
5710 | } | 5725 | } |
5711 | 5726 | ||
5712 | /* Updating statistics */ | 5727 | /* Updating statistics */ |
@@ -5918,13 +5933,6 @@ static void s2io_init_pci(nic_t * sp) | |||
5918 | pci_write_config_word(sp->pdev, PCI_COMMAND, | 5933 | pci_write_config_word(sp->pdev, PCI_COMMAND, |
5919 | (pci_cmd | PCI_COMMAND_PARITY)); | 5934 | (pci_cmd | PCI_COMMAND_PARITY)); |
5920 | pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd); | 5935 | pci_read_config_word(sp->pdev, PCI_COMMAND, &pci_cmd); |
5921 | |||
5922 | /* Forcibly disabling relaxed ordering capability of the card. */ | ||
5923 | pcix_cmd &= 0xfffd; | ||
5924 | pci_write_config_word(sp->pdev, PCIX_COMMAND_REGISTER, | ||
5925 | pcix_cmd); | ||
5926 | pci_read_config_word(sp->pdev, PCIX_COMMAND_REGISTER, | ||
5927 | &(pcix_cmd)); | ||
5928 | } | 5936 | } |
5929 | 5937 | ||
5930 | MODULE_AUTHOR("Raghavendra Koushik <raghavendra.koushik@neterion.com>"); | 5938 | MODULE_AUTHOR("Raghavendra Koushik <raghavendra.koushik@neterion.com>"); |