aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFugang Duan <B38611@freescale.com>2014-04-01 01:55:09 -0400
committerNitin Garg <nitin.garg@freescale.com>2014-04-16 09:58:19 -0400
commitabefc6726048470914ee32388e5be6f6993090a6 (patch)
tree44fe44fcd6ee2cd40183946a6b7c84760767d214
parenteb57d56ada1cda08b3c621861c84c72b90fa06fc (diff)
ENGR00305366-04 net: fec: optimize imx6sx enet performance
The patch do below changes for the enet performance improvement: - Enable GRO in default. The feature can be accessed by ethtool. - In enet napi callback, check interrupt to call tx/rx clean ring function. - For high rate register access, use __raw_writel/__raw_readl instead of writel/readl. When write trigger register for tx/rx, add dmb() to make sure the order. After the optimizition, and below condition: - cpu frequency is 996Mhz, cpufreq goverment is performance. - Connect to FPGA board. The imx6sx enet tcp performance result: TX: 867Mbps, cpu loading near to 100%. RX: 940Mbps, cpu loading near to 92%. Signed-off-by: Fugang Duan <B38611@freescale.com>
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c71
1 files changed, 48 insertions, 23 deletions
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c337e26cde9c..5eab33668b32 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -304,6 +304,9 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
304 unsigned short queue; 304 unsigned short queue;
305 void *bufaddr; 305 void *bufaddr;
306 unsigned short status; 306 unsigned short status;
307 unsigned int status_esc;
308 unsigned int bdbuf_len;
309 unsigned int bdbuf_addr;
307 unsigned int index; 310 unsigned int index;
308 311
309 queue = skb_get_queue_mapping(skb); 312 queue = skb_get_queue_mapping(skb);
@@ -334,7 +337,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
334 337
335 /* Set buffer length and buffer pointer */ 338 /* Set buffer length and buffer pointer */
336 bufaddr = skb->data; 339 bufaddr = skb->data;
337 bdp->cbd_datlen = skb->len; 340 bdbuf_len = skb->len;
338 341
339 /* 342 /*
340 * On some FEC implementations data must be aligned on 343 * On some FEC implementations data must be aligned on
@@ -367,7 +370,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
367 /* Push the data cache so the CPM does not get stale memory 370 /* Push the data cache so the CPM does not get stale memory
368 * data. 371 * data.
369 */ 372 */
370 bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, 373 bdbuf_addr = dma_map_single(&fep->pdev->dev, bufaddr,
371 skb->len, DMA_TO_DEVICE); 374 skb->len, DMA_TO_DEVICE);
372 if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { 375 if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
373 bdp->cbd_bufaddr = 0; 376 bdp->cbd_bufaddr = 0;
@@ -378,27 +381,34 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
378 if (fep->bufdesc_ex) { 381 if (fep->bufdesc_ex) {
379 382
380 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 383 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
381 ebdp->cbd_bdu = 0; 384
382 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && 385 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
383 fep->hwts_tx_en) || unlikely(fep->hwts_tx_en_ioctl && 386 fep->hwts_tx_en) || unlikely(fep->hwts_tx_en_ioctl &&
384 fec_ptp_do_txstamp(skb))) { 387 fec_ptp_do_txstamp(skb))) {
385 ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT); 388 status_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT);
386 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 389 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
387 } else { 390 } else {
388 ebdp->cbd_esc = BD_ENET_TX_INT; 391 status_esc = BD_ENET_TX_INT;
389 392
390 /* Enable protocol checksum flags 393 /* Enable protocol checksum flags
391 * We do not bother with the IP Checksum bits as they 394 * We do not bother with the IP Checksum bits as they
392 * are done by the kernel 395 * are done by the kernel
393 */ 396 */
394 if (skb->ip_summed == CHECKSUM_PARTIAL) 397 if (skb->ip_summed == CHECKSUM_PARTIAL)
395 ebdp->cbd_esc |= BD_ENET_TX_PINS; 398 status_esc |= BD_ENET_TX_PINS;
396 } 399 }
397 400
398 if (id_entry->driver_data & FEC_QUIRK_HAS_AVB) 401 if (id_entry->driver_data & FEC_QUIRK_HAS_AVB)
399 ebdp->cbd_esc |= FEC_TX_BD_FTYPE(queue); 402 status_esc |= FEC_TX_BD_FTYPE(queue);
403
404 ebdp->cbd_bdu = 0;
405 ebdp->cbd_esc = status_esc;
400 } 406 }
401 407
408 bdp->cbd_bufaddr = bdbuf_addr;
409 bdp->cbd_datlen = bdbuf_len;
410 dmb();
411
402 /* Send it on its way. Tell FEC it's ready, interrupt when done, 412 /* Send it on its way. Tell FEC it's ready, interrupt when done,
403 * it's the last BD of the frame, and to put the CRC on the end. 413 * it's the last BD of the frame, and to put the CRC on the end.
404 */ 414 */
@@ -426,11 +436,13 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
426 436
427 /* Trigger transmission start */ 437 /* Trigger transmission start */
428 if (!(id_entry->driver_data & FEC_QUIRK_TKT210582) || 438 if (!(id_entry->driver_data & FEC_QUIRK_TKT210582) ||
429 !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || 439 !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) ||
430 !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || 440 !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) ||
431 !readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) || 441 !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue)) ||
432 !readl(fep->hwp + FEC_X_DES_ACTIVE(queue))) 442 !__raw_readl(fep->hwp + FEC_X_DES_ACTIVE(queue))) {
433 writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue)); 443 dmb();
444 __raw_writel(0, fep->hwp + FEC_X_DES_ACTIVE(queue));
445 }
434 446
435 return NETDEV_TX_OK; 447 return NETDEV_TX_OK;
436} 448}
@@ -1137,7 +1149,6 @@ rx_processing_done:
1137 1149
1138 /* Mark the buffer empty */ 1150 /* Mark the buffer empty */
1139 status |= BD_ENET_RX_EMPTY; 1151 status |= BD_ENET_RX_EMPTY;
1140 bdp->cbd_sc = status;
1141 1152
1142 if (fep->bufdesc_ex) { 1153 if (fep->bufdesc_ex) {
1143 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 1154 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
@@ -1147,14 +1158,21 @@ rx_processing_done:
1147 ebdp->cbd_bdu = 0; 1158 ebdp->cbd_bdu = 0;
1148 } 1159 }
1149 1160
1150 /* Update BD pointer to next entry */ 1161 dmb();
1151 bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); 1162
1163 bdp->cbd_sc = status;
1152 1164
1153 /* Doing this here will keep the FEC running while we process 1165 /* Doing this here will keep the FEC running while we process
1154 * incoming frames. On a heavily loaded network, we should be 1166 * incoming frames. On a heavily loaded network, we should be
1155 * able to keep up at the expense of system resources. 1167 * able to keep up at the expense of system resources.
1156 */ 1168 */
1157 writel(0, fep->hwp + FEC_R_DES_ACTIVE(queue_id)); 1169 if (!__raw_readl(fep->hwp + FEC_R_DES_ACTIVE(queue_id))) {
1170 dmb();
1171 __raw_writel(0, fep->hwp + FEC_R_DES_ACTIVE(queue_id));
1172 }
1173
1174 /* Update BD pointer to next entry */
1175 bdp = fec_enet_get_nextdesc(bdp, fep, queue_id);
1158 } 1176 }
1159 rxq->cur_rx = bdp; 1177 rxq->cur_rx = bdp;
1160 } 1178 }
@@ -1166,8 +1184,8 @@ static bool fec_enet_collect_events(struct fec_enet_private *fep)
1166{ 1184{
1167 uint int_events; 1185 uint int_events;
1168 1186
1169 int_events = readl(fep->hwp + FEC_IEVENT); 1187 int_events = __raw_readl(fep->hwp + FEC_IEVENT);
1170 writel(int_events & (~FEC_ENET_TS_TIMER), 1188 __raw_writel(int_events & (~FEC_ENET_TS_TIMER),
1171 fep->hwp + FEC_IEVENT); 1189 fep->hwp + FEC_IEVENT);
1172 1190
1173 if (int_events == 0) 1191 if (int_events == 0)
@@ -1212,7 +1230,7 @@ fec_enet_interrupt(int irq, void *dev_id)
1212 if (fep->hwts_tx_en_ioctl || fep->hwts_rx_en_ioctl) 1230 if (fep->hwts_tx_en_ioctl || fep->hwts_rx_en_ioctl)
1213 fep->prtc++; 1231 fep->prtc++;
1214 1232
1215 writel(FEC_ENET_TS_TIMER, fep->hwp + FEC_IEVENT); 1233 __raw_writel(FEC_ENET_TS_TIMER, fep->hwp + FEC_IEVENT);
1216 fep->work_ts = 0; 1234 fep->work_ts = 0;
1217 } 1235 }
1218 1236
@@ -1221,7 +1239,7 @@ fec_enet_interrupt(int irq, void *dev_id)
1221 1239
1222 /* Disable the RX interrupt */ 1240 /* Disable the RX interrupt */
1223 if (napi_schedule_prep(&fep->napi)) { 1241 if (napi_schedule_prep(&fep->napi)) {
1224 writel(FEC_RX_DISABLED_IMASK, 1242 __raw_writel(FEC_RX_DISABLED_IMASK,
1225 fep->hwp + FEC_IMASK); 1243 fep->hwp + FEC_IMASK);
1226 __napi_schedule(&fep->napi); 1244 __napi_schedule(&fep->napi);
1227 } 1245 }
@@ -1240,14 +1258,17 @@ fec_enet_interrupt(int irq, void *dev_id)
1240static int fec_enet_rx_napi(struct napi_struct *napi, int budget) 1258static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
1241{ 1259{
1242 struct net_device *ndev = napi->dev; 1260 struct net_device *ndev = napi->dev;
1243 int pkts = fec_enet_rx(ndev, budget);
1244 struct fec_enet_private *fep = netdev_priv(ndev); 1261 struct fec_enet_private *fep = netdev_priv(ndev);
1262 int pkts = 0;
1245 1263
1246 fec_enet_tx(ndev); 1264 if (fep->work_rx)
1265 pkts = fec_enet_rx(ndev, budget);
1266 if (fep->work_tx)
1267 fec_enet_tx(ndev);
1247 1268
1248 if (pkts < budget) { 1269 if (pkts < budget) {
1249 napi_complete(napi); 1270 napi_complete(napi);
1250 writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); 1271 __raw_writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
1251 } 1272 }
1252 return pkts; 1273 return pkts;
1253} 1274}
@@ -2483,6 +2504,10 @@ static int fec_enet_init(struct net_device *ndev)
2483 fep->csum_flags |= FLAG_RX_CSUM_ENABLED; 2504 fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
2484 } 2505 }
2485 2506
2507 /* enable GRO in default */
2508 ndev->features |= NETIF_F_GRO;
2509 ndev->hw_features |= NETIF_F_GRO;
2510
2486 fec_restart(ndev, 0); 2511 fec_restart(ndev, 0);
2487 2512
2488 return 0; 2513 return 0;