diff options
author | Fugang Duan <B38611@freescale.com> | 2014-04-01 01:55:09 -0400 |
---|---|---|
committer | Nitin Garg <nitin.garg@freescale.com> | 2014-04-16 09:58:19 -0400 |
commit | abefc6726048470914ee32388e5be6f6993090a6 (patch) | |
tree | 44fe44fcd6ee2cd40183946a6b7c84760767d214 | |
parent | eb57d56ada1cda08b3c621861c84c72b90fa06fc (diff) |
ENGR00305366-04 net: fec: optimize imx6sx enet performance
The patch do below changes for the enet performance improvement:
- Enable GRO in default. The feature can be accessed by ethtool.
- In enet napi callback, check interrupt to call tx/rx clean ring
function.
- For high rate register access, use __raw_writel/__raw_readl instead
of writel/readl. When write trigger register for tx/rx, add dmb()
to make sure the order.
After the optimizition, and below condition:
- cpu frequency is 996Mhz, cpufreq goverment is performance.
- Connect to FPGA board.
The imx6sx enet tcp performance result:
TX: 867Mbps, cpu loading near to 100%.
RX: 940Mbps, cpu loading near to 92%.
Signed-off-by: Fugang Duan <B38611@freescale.com>
-rw-r--r-- | drivers/net/ethernet/freescale/fec_main.c | 71 |
1 files changed, 48 insertions, 23 deletions
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c337e26cde9c..5eab33668b32 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c | |||
@@ -304,6 +304,9 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | |||
304 | unsigned short queue; | 304 | unsigned short queue; |
305 | void *bufaddr; | 305 | void *bufaddr; |
306 | unsigned short status; | 306 | unsigned short status; |
307 | unsigned int status_esc; | ||
308 | unsigned int bdbuf_len; | ||
309 | unsigned int bdbuf_addr; | ||
307 | unsigned int index; | 310 | unsigned int index; |
308 | 311 | ||
309 | queue = skb_get_queue_mapping(skb); | 312 | queue = skb_get_queue_mapping(skb); |
@@ -334,7 +337,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | |||
334 | 337 | ||
335 | /* Set buffer length and buffer pointer */ | 338 | /* Set buffer length and buffer pointer */ |
336 | bufaddr = skb->data; | 339 | bufaddr = skb->data; |
337 | bdp->cbd_datlen = skb->len; | 340 | bdbuf_len = skb->len; |
338 | 341 | ||
339 | /* | 342 | /* |
340 | * On some FEC implementations data must be aligned on | 343 | * On some FEC implementations data must be aligned on |
@@ -367,7 +370,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | |||
367 | /* Push the data cache so the CPM does not get stale memory | 370 | /* Push the data cache so the CPM does not get stale memory |
368 | * data. | 371 | * data. |
369 | */ | 372 | */ |
370 | bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, | 373 | bdbuf_addr = dma_map_single(&fep->pdev->dev, bufaddr, |
371 | skb->len, DMA_TO_DEVICE); | 374 | skb->len, DMA_TO_DEVICE); |
372 | if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { | 375 | if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { |
373 | bdp->cbd_bufaddr = 0; | 376 | bdp->cbd_bufaddr = 0; |
@@ -378,27 +381,34 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | |||
378 | if (fep->bufdesc_ex) { | 381 | if (fep->bufdesc_ex) { |
379 | 382 | ||
380 | struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; | 383 | struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; |
381 | ebdp->cbd_bdu = 0; | 384 | |
382 | if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && | 385 | if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && |
383 | fep->hwts_tx_en) || unlikely(fep->hwts_tx_en_ioctl && | 386 | fep->hwts_tx_en) || unlikely(fep->hwts_tx_en_ioctl && |
384 | fec_ptp_do_txstamp(skb))) { | 387 | fec_ptp_do_txstamp(skb))) { |
385 | ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT); | 388 | status_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT); |
386 | skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; | 389 | skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; |
387 | } else { | 390 | } else { |
388 | ebdp->cbd_esc = BD_ENET_TX_INT; | 391 | status_esc = BD_ENET_TX_INT; |
389 | 392 | ||
390 | /* Enable protocol checksum flags | 393 | /* Enable protocol checksum flags |
391 | * We do not bother with the IP Checksum bits as they | 394 | * We do not bother with the IP Checksum bits as they |
392 | * are done by the kernel | 395 | * are done by the kernel |
393 | */ | 396 | */ |
394 | if (skb->ip_summed == CHECKSUM_PARTIAL) | 397 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
395 | ebdp->cbd_esc |= BD_ENET_TX_PINS; | 398 | status_esc |= BD_ENET_TX_PINS; |
396 | } | 399 | } |
397 | 400 | ||
398 | if (id_entry->driver_data & FEC_QUIRK_HAS_AVB) | 401 | if (id_entry->driver_data & FEC_QUIRK_HAS_AVB) |
399 | ebdp->cbd_esc |= FEC_TX_BD_FTYPE(queue); | 402 | status_esc |= FEC_TX_BD_FTYPE(queue); |
403 | |||
404 | ebdp->cbd_bdu = 0; | ||
405 | ebdp->cbd_esc = status_esc; | ||
400 | } | 406 | } |
401 | 407 | ||
408 | bdp->cbd_bufaddr = bdbuf_addr; | ||
409 | bdp->cbd_datlen = bdbuf_len; | ||
410 | dmb(); | ||
411 | |||
402 | /* Send it on its way. Tell FEC it's ready, interrupt when done, | 412 | /* Send it on its way. Tell FEC it's ready, interrupt when done, |
403 | * it's the last BD of the frame, and to put the CRC on the end. | 413 | * it's the last BD of the frame, and to put the CRC on the end. |
404 | */ | 414 | */ |
@@ -426,11 +436,13 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | |||
426 | 436 | ||
427 | /* Trigger transmission start */ | 437 | /* Trigger transmission start */ |
428 | if (!(id_entry->driver_data & FEC_QUIRK_TKT210582) || | 438 | if (!(id_entry->driver_data & FEC_QUIRK_TKT210582) || |
429 | !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || | 439 | !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || |
430 | !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || | 440 | !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || |
431 | !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || | 441 | !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || |
432 | !readl(fep->hwp + FEC_X_DES_ACTIVE(queue))) | 442 | !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue))) { |
433 | writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue)); | 443 | dmb(); |
444 | __raw_writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue)); | ||
445 | } | ||
434 | 446 | ||
435 | return NETDEV_TX_OK; | 447 | return NETDEV_TX_OK; |
436 | } | 448 | } |
@@ -1137,7 +1149,6 @@ rx_processing_done: | |||
1137 | 1149 | ||
1138 | /* Mark the buffer empty */ | 1150 | /* Mark the buffer empty */ |
1139 | status |= BD_ENET_RX_EMPTY; | 1151 | status |= BD_ENET_RX_EMPTY; |
1140 | bdp->cbd_sc = status; | ||
1141 | 1152 | ||
1142 | if (fep->bufdesc_ex) { | 1153 | if (fep->bufdesc_ex) { |
1143 | struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; | 1154 | struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; |
@@ -1147,14 +1158,21 @@ rx_processing_done: | |||
1147 | ebdp->cbd_bdu = 0; | 1158 | ebdp->cbd_bdu = 0; |
1148 | } | 1159 | } |
1149 | 1160 | ||
1150 | /* Update BD pointer to next entry */ | 1161 | dmb(); |
1151 | bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); | 1162 | |
1163 | bdp->cbd_sc = status; | ||
1152 | 1164 | ||
1153 | /* Doing this here will keep the FEC running while we process | 1165 | /* Doing this here will keep the FEC running while we process |
1154 | * incoming frames. On a heavily loaded network, we should be | 1166 | * incoming frames. On a heavily loaded network, we should be |
1155 | * able to keep up at the expense of system resources. | 1167 | * able to keep up at the expense of system resources. |
1156 | */ | 1168 | */ |
1157 | writel(0, fep->hwp + FEC_R_DES_ACTIVE(queue_id)); | 1169 | if (!__raw_readl(fep->hwp + FEC_R_DES_ACTIVE(queue_id))) { |
1170 | dmb(); | ||
1171 | __raw_writel(0, fep->hwp + FEC_R_DES_ACTIVE(queue_id)); | ||
1172 | } | ||
1173 | |||
1174 | /* Update BD pointer to next entry */ | ||
1175 | bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); | ||
1158 | } | 1176 | } |
1159 | rxq->cur_rx = bdp; | 1177 | rxq->cur_rx = bdp; |
1160 | } | 1178 | } |
@@ -1166,8 +1184,8 @@ static bool fec_enet_collect_events(struct fec_enet_private *fep) | |||
1166 | { | 1184 | { |
1167 | uint int_events; | 1185 | uint int_events; |
1168 | 1186 | ||
1169 | int_events = readl(fep->hwp + FEC_IEVENT); | 1187 | int_events = __raw_readl(fep->hwp + FEC_IEVENT); |
1170 | writel(int_events & (~FEC_ENET_TS_TIMER), | 1188 | __raw_writel(int_events & (~FEC_ENET_TS_TIMER), |
1171 | fep->hwp + FEC_IEVENT); | 1189 | fep->hwp + FEC_IEVENT); |
1172 | 1190 | ||
1173 | if (int_events == 0) | 1191 | if (int_events == 0) |
@@ -1212,7 +1230,7 @@ fec_enet_interrupt(int irq, void *dev_id) | |||
1212 | if (fep->hwts_tx_en_ioctl || fep->hwts_rx_en_ioctl) | 1230 | if (fep->hwts_tx_en_ioctl || fep->hwts_rx_en_ioctl) |
1213 | fep->prtc++; | 1231 | fep->prtc++; |
1214 | 1232 | ||
1215 | writel(FEC_ENET_TS_TIMER, fep->hwp + FEC_IEVENT); | 1233 | __raw_writel(FEC_ENET_TS_TIMER, fep->hwp + FEC_IEVENT); |
1216 | fep->work_ts = 0; | 1234 | fep->work_ts = 0; |
1217 | } | 1235 | } |
1218 | 1236 | ||
@@ -1221,7 +1239,7 @@ fec_enet_interrupt(int irq, void *dev_id) | |||
1221 | 1239 | ||
1222 | /* Disable the RX interrupt */ | 1240 | /* Disable the RX interrupt */ |
1223 | if (napi_schedule_prep(&fep->napi)) { | 1241 | if (napi_schedule_prep(&fep->napi)) { |
1224 | writel(FEC_RX_DISABLED_IMASK, | 1242 | __raw_writel(FEC_RX_DISABLED_IMASK, |
1225 | fep->hwp + FEC_IMASK); | 1243 | fep->hwp + FEC_IMASK); |
1226 | __napi_schedule(&fep->napi); | 1244 | __napi_schedule(&fep->napi); |
1227 | } | 1245 | } |
@@ -1240,14 +1258,17 @@ fec_enet_interrupt(int irq, void *dev_id) | |||
1240 | static int fec_enet_rx_napi(struct napi_struct *napi, int budget) | 1258 | static int fec_enet_rx_napi(struct napi_struct *napi, int budget) |
1241 | { | 1259 | { |
1242 | struct net_device *ndev = napi->dev; | 1260 | struct net_device *ndev = napi->dev; |
1243 | int pkts = fec_enet_rx(ndev, budget); | ||
1244 | struct fec_enet_private *fep = netdev_priv(ndev); | 1261 | struct fec_enet_private *fep = netdev_priv(ndev); |
1262 | int pkts = 0; | ||
1245 | 1263 | ||
1246 | fec_enet_tx(ndev); | 1264 | if (fep->work_rx) |
1265 | pkts = fec_enet_rx(ndev, budget); | ||
1266 | if (fep->work_tx) | ||
1267 | fec_enet_tx(ndev); | ||
1247 | 1268 | ||
1248 | if (pkts < budget) { | 1269 | if (pkts < budget) { |
1249 | napi_complete(napi); | 1270 | napi_complete(napi); |
1250 | writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); | 1271 | __raw_writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); |
1251 | } | 1272 | } |
1252 | return pkts; | 1273 | return pkts; |
1253 | } | 1274 | } |
@@ -2483,6 +2504,10 @@ static int fec_enet_init(struct net_device *ndev) | |||
2483 | fep->csum_flags |= FLAG_RX_CSUM_ENABLED; | 2504 | fep->csum_flags |= FLAG_RX_CSUM_ENABLED; |
2484 | } | 2505 | } |
2485 | 2506 | ||
2507 | /* enable GRO in default */ | ||
2508 | ndev->features |= NETIF_F_GRO; | ||
2509 | ndev->hw_features |= NETIF_F_GRO; | ||
2510 | |||
2486 | fec_restart(ndev, 0); | 2511 | fec_restart(ndev, 0); |
2487 | 2512 | ||
2488 | return 0; | 2513 | return 0; |