aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/tile
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2011-03-01 12:49:53 -0500
committerChris Metcalf <cmetcalf@tilera.com>2011-03-10 13:18:12 -0500
commitd91c641233ae09fcccec75313b7f55992668bf8d (patch)
tree1ecc5d498b7cbb35c1c4474bd3c23bf8f411e010 /drivers/net/tile
parent76c567fbba50c3da2f4d40e2e551bab26cfd4381 (diff)
tile on-chip network driver: sync up with latest fixes
Combine the "command" and "completion" locks into a single lock, on each egress queue, to improve efficiency. Support the use of 4KB pages in the "egress queue". Delete the unused "duplicate ACK compression" code. Filter "bad" (i.e. truncated) packets. Avoid corrupting "dev->napi_list", by sequentializing modifications. Deregister for incoming packets during stop, to reduce unexpected interrupts. Also, encourage active NAPI loops to complete before we disable NAPI, which would otherwise crash. Free any pending completions after shutting down LEPP. Use a single, permanently registered, IRQ handler, to avoid situations in which the IRQ handler was firing after being freed, and ignore any "unexpected" interrupts. Drop egress packets, instead of spinning, if the hardware cannot keep up, or is disconnected. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'drivers/net/tile')
-rw-r--r--drivers/net/tile/tilepro.c961
1 files changed, 515 insertions, 446 deletions
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c
index f9012992d21..0825db6d883 100644
--- a/drivers/net/tile/tilepro.c
+++ b/drivers/net/tile/tilepro.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved. 2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
@@ -44,10 +44,6 @@
44#include <linux/tcp.h> 44#include <linux/tcp.h>
45 45
46 46
47/* There is no singlethread_cpu, so schedule work on the current cpu. */
48#define singlethread_cpu -1
49
50
51/* 47/*
52 * First, "tile_net_init_module()" initializes all four "devices" which 48 * First, "tile_net_init_module()" initializes all four "devices" which
53 * can be used by linux. 49 * can be used by linux.
@@ -73,15 +69,16 @@
73 * return, knowing we will be called again later. Otherwise, we 69 * return, knowing we will be called again later. Otherwise, we
74 * reenable the ingress interrupt, and call "napi_complete()". 70 * reenable the ingress interrupt, and call "napi_complete()".
75 * 71 *
72 * HACK: Since disabling the ingress interrupt is not reliable, we
73 * ignore the interrupt if the global "active" flag is false.
74 *
76 * 75 *
77 * NOTE: The use of "native_driver" ensures that EPP exists, and that 76 * NOTE: The use of "native_driver" ensures that EPP exists, and that
78 * "epp_sendv" is legal, and that "LIPP" is being used. 77 * we are using "LIPP" and "LEPP".
79 * 78 *
80 * NOTE: Failing to free completions for an arbitrarily long time 79 * NOTE: Failing to free completions for an arbitrarily long time
81 * (which is defined to be illegal) does in fact cause bizarre 80 * (which is defined to be illegal) does in fact cause bizarre
82 * problems. The "egress_timer" helps prevent this from happening. 81 * problems. The "egress_timer" helps prevent this from happening.
83 *
84 * NOTE: The egress code can be interrupted by the interrupt handler.
85 */ 82 */
86 83
87 84
@@ -142,6 +139,7 @@
142MODULE_AUTHOR("Tilera"); 139MODULE_AUTHOR("Tilera");
143MODULE_LICENSE("GPL"); 140MODULE_LICENSE("GPL");
144 141
142
145/* 143/*
146 * Queue of incoming packets for a specific cpu and device. 144 * Queue of incoming packets for a specific cpu and device.
147 * 145 *
@@ -177,7 +175,7 @@ struct tile_net_cpu {
177 struct tile_netio_queue queue; 175 struct tile_netio_queue queue;
178 /* Statistics. */ 176 /* Statistics. */
179 struct tile_net_stats_t stats; 177 struct tile_net_stats_t stats;
180 /* ISSUE: Is this needed? */ 178 /* True iff NAPI is enabled. */
181 bool napi_enabled; 179 bool napi_enabled;
182 /* True if this tile has succcessfully registered with the IPP. */ 180 /* True if this tile has succcessfully registered with the IPP. */
183 bool registered; 181 bool registered;
@@ -200,20 +198,20 @@ struct tile_net_cpu {
200struct tile_net_priv { 198struct tile_net_priv {
201 /* Our network device. */ 199 /* Our network device. */
202 struct net_device *dev; 200 struct net_device *dev;
203 /* The actual egress queue. */ 201 /* Pages making up the egress queue. */
204 lepp_queue_t *epp_queue; 202 struct page *eq_pages;
205 /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */ 203 /* Address of the actual egress queue. */
206 spinlock_t cmd_lock; 204 lepp_queue_t *eq;
207 /* Protects "epp_queue->comp_head". */ 205 /* Protects "eq". */
208 spinlock_t comp_lock; 206 spinlock_t eq_lock;
209 /* The hypervisor handle for this interface. */ 207 /* The hypervisor handle for this interface. */
210 int hv_devhdl; 208 int hv_devhdl;
211 /* The intr bit mask that IDs this device. */ 209 /* The intr bit mask that IDs this device. */
212 u32 intr_id; 210 u32 intr_id;
213 /* True iff "tile_net_open_aux()" has succeeded. */ 211 /* True iff "tile_net_open_aux()" has succeeded. */
214 int partly_opened; 212 bool partly_opened;
215 /* True iff "tile_net_open_inner()" has succeeded. */ 213 /* True iff the device is "active". */
216 int fully_opened; 214 bool active;
217 /* Effective network cpus. */ 215 /* Effective network cpus. */
218 struct cpumask network_cpus_map; 216 struct cpumask network_cpus_map;
219 /* Number of network cpus. */ 217 /* Number of network cpus. */
@@ -228,6 +226,10 @@ struct tile_net_priv {
228 struct tile_net_cpu *cpu[NR_CPUS]; 226 struct tile_net_cpu *cpu[NR_CPUS];
229}; 227};
230 228
229/* Log2 of the number of small pages needed for the egress queue. */
230#define EQ_ORDER get_order(sizeof(lepp_queue_t))
231/* Size of the egress queue's pages. */
232#define EQ_SIZE (1 << (PAGE_SHIFT + EQ_ORDER))
231 233
232/* 234/*
233 * The actual devices (xgbe0, xgbe1, gbe0, gbe1). 235 * The actual devices (xgbe0, xgbe1, gbe0, gbe1).
@@ -284,7 +286,11 @@ static void net_printk(char *fmt, ...)
284 */ 286 */
285static void dump_packet(unsigned char *data, unsigned long length, char *s) 287static void dump_packet(unsigned char *data, unsigned long length, char *s)
286{ 288{
289 int my_cpu = smp_processor_id();
290
287 unsigned long i; 291 unsigned long i;
292 char buf[128];
293
288 static unsigned int count; 294 static unsigned int count;
289 295
290 pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", 296 pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n",
@@ -294,10 +300,12 @@ static void dump_packet(unsigned char *data, unsigned long length, char *s)
294 300
295 for (i = 0; i < length; i++) { 301 for (i = 0; i < length; i++) {
296 if ((i & 0xf) == 0) 302 if ((i & 0xf) == 0)
297 sprintf(buf, "%8.8lx:", i); 303 sprintf(buf, "[%02d] %8.8lx:", my_cpu, i);
298 sprintf(buf + strlen(buf), " %2.2x", data[i]); 304 sprintf(buf + strlen(buf), " %2.2x", data[i]);
299 if ((i & 0xf) == 0xf || i == length - 1) 305 if ((i & 0xf) == 0xf || i == length - 1) {
300 pr_info("%s\n", buf); 306 strcat(buf, "\n");
307 pr_info("%s", buf);
308 }
301 } 309 }
302} 310}
303#endif 311#endif
@@ -351,60 +359,109 @@ static void tile_net_provide_linux_buffer(struct tile_net_cpu *info,
351 359
352/* 360/*
353 * Provide a linux buffer for LIPP. 361 * Provide a linux buffer for LIPP.
362 *
363 * Note that the ACTUAL allocation for each buffer is a "struct sk_buff",
364 * plus a chunk of memory that includes not only the requested bytes, but
365 * also NET_SKB_PAD bytes of initial padding, and a "struct skb_shared_info".
366 *
367 * Note that "struct skb_shared_info" is 88 bytes with 64K pages and
368 * 268 bytes with 4K pages (since the frags[] array needs 18 entries).
369 *
370 * Without jumbo packets, the maximum packet size will be 1536 bytes,
371 * and we use 2 bytes (NET_IP_ALIGN) of padding. ISSUE: If we told
372 * the hardware to clip at 1518 bytes instead of 1536 bytes, then we
373 * could save an entire cache line, but in practice, we don't need it.
374 *
375 * Since CPAs are 38 bits, and we can only encode the high 31 bits in
376 * a "linux_buffer_t", the low 7 bits must be zero, and thus, we must
377 * align the actual "va" mod 128.
378 *
379 * We assume that the underlying "head" will be aligned mod 64. Note
380 * that in practice, we have seen "head" NOT aligned mod 128 even when
381 * using 2048 byte allocations, which is surprising.
382 *
383 * If "head" WAS always aligned mod 128, we could change LIPP to
384 * assume that the low SIX bits are zero, and the 7th bit is one, that
385 * is, align the actual "va" mod 128 plus 64, which would be "free".
386 *
387 * For now, the actual "head" pointer points at NET_SKB_PAD bytes of
388 * padding, plus 28 or 92 bytes of extra padding, plus the sk_buff
389 * pointer, plus the NET_IP_ALIGN padding, plus 126 or 1536 bytes for
390 * the actual packet, plus 62 bytes of empty padding, plus some
391 * padding and the "struct skb_shared_info".
392 *
393 * With 64K pages, a large buffer thus needs 32+92+4+2+1536+62+88
394 * bytes, or 1816 bytes, which fits comfortably into 2048 bytes.
395 *
396 * With 64K pages, a small buffer thus needs 32+92+4+2+126+88
397 * bytes, or 344 bytes, which means we are wasting 64+ bytes, and
398 * could presumably increase the size of small buffers.
399 *
400 * With 4K pages, a large buffer thus needs 32+92+4+2+1536+62+268
401 * bytes, or 1996 bytes, which fits comfortably into 2048 bytes.
402 *
403 * With 4K pages, a small buffer thus needs 32+92+4+2+126+268
404 * bytes, or 524 bytes, which is annoyingly wasteful.
405 *
406 * Maybe we should increase LIPP_SMALL_PACKET_SIZE to 192?
407 *
408 * ISSUE: Maybe we should increase "NET_SKB_PAD" to 64?
354 */ 409 */
355static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, 410static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
356 bool small) 411 bool small)
357{ 412{
358 /* ISSUE: What should we use here? */ 413#if TILE_NET_MTU <= 1536
414 /* Without "jumbo", 2 + 1536 should be sufficient. */
415 unsigned int large_size = NET_IP_ALIGN + 1536;
416#else
417 /* ISSUE: This has not been tested. */
359 unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; 418 unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100;
419#endif
360 420
361 /* Round up to ensure to avoid "false sharing" with last cache line. */ 421 /* Avoid "false sharing" with last cache line. */
362 unsigned int buffer_size = 422 /* ISSUE: This is already done by "dev_alloc_skb()". */
423 unsigned int len =
363 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + 424 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) +
364 CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); 425 CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE());
365 426
366 /* 427 unsigned int padding = 128 - NET_SKB_PAD;
367 * ISSUE: Since CPAs are 38 bits, and we can only encode the 428 unsigned int align;
368 * high 31 bits in a "linux_buffer_t", the low 7 bits must be
369 * zero, and thus, we must align the actual "va" mod 128.
370 */
371 const unsigned long align = 128;
372 429
373 struct sk_buff *skb; 430 struct sk_buff *skb;
374 void *va; 431 void *va;
375 432
376 struct sk_buff **skb_ptr; 433 struct sk_buff **skb_ptr;
377 434
378 /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */ 435 /* Request 96 extra bytes for alignment purposes. */
379 /* and also "reserves" that many bytes. */ 436 skb = dev_alloc_skb(len + padding);
380 /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */ 437 if (skb == NULL)
381 int len = sizeof(*skb_ptr) + align + buffer_size; 438 return false;
382
383 while (1) {
384
385 /* Allocate (or fail). */
386 skb = dev_alloc_skb(len);
387 if (skb == NULL)
388 return false;
389
390 /* Make room for a back-pointer to 'skb'. */
391 skb_reserve(skb, sizeof(*skb_ptr));
392 439
393 /* Make sure we are aligned. */ 440 /* Skip 32 or 96 bytes to align "data" mod 128. */
394 skb_reserve(skb, -(long)skb->data & (align - 1)); 441 align = -(long)skb->data & (128 - 1);
442 BUG_ON(align > padding);
443 skb_reserve(skb, align);
395 444
396 /* This address is given to IPP. */ 445 /* This address is given to IPP. */
397 va = skb->data; 446 va = skb->data;
398 447
399 if (small) 448 /* Buffers must not span a huge page. */
400 break; 449 BUG_ON(((((long)va & ~HPAGE_MASK) + len) & HPAGE_MASK) != 0);
401 450
402 /* ISSUE: This has never been observed! */ 451#ifdef TILE_NET_PARANOIA
403 /* Large buffers must not span a huge page. */ 452#if CHIP_HAS_CBOX_HOME_MAP()
404 if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0) 453 if (hash_default) {
405 break; 454 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
406 pr_err("Leaking unaligned linux buffer at %p.\n", va); 455 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
456 panic("Non-HFH ingress buffer! VA=%p Mode=%d PTE=%llx",
457 va, hv_pte_get_mode(pte), hv_pte_val(pte));
407 } 458 }
459#endif
460#endif
461
462 /* Invalidate the packet buffer. */
463 if (!hash_default)
464 __inv_buffer(va, len);
408 465
409 /* Skip two bytes to satisfy LIPP assumptions. */ 466 /* Skip two bytes to satisfy LIPP assumptions. */
410 /* Note that this aligns IP on a 16 byte boundary. */ 467 /* Note that this aligns IP on a 16 byte boundary. */
@@ -415,23 +472,9 @@ static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
415 skb_ptr = va - sizeof(*skb_ptr); 472 skb_ptr = va - sizeof(*skb_ptr);
416 *skb_ptr = skb; 473 *skb_ptr = skb;
417 474
418 /* Invalidate the packet buffer. */
419 if (!hash_default)
420 __inv_buffer(skb->data, buffer_size);
421
422 /* Make sure "skb_ptr" has been flushed. */ 475 /* Make sure "skb_ptr" has been flushed. */
423 __insn_mf(); 476 __insn_mf();
424 477
425#ifdef TILE_NET_PARANOIA
426#if CHIP_HAS_CBOX_HOME_MAP()
427 if (hash_default) {
428 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
429 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
430 panic("Non-coherent ingress buffer!");
431 }
432#endif
433#endif
434
435 /* Provide the new buffer. */ 478 /* Provide the new buffer. */
436 tile_net_provide_linux_buffer(info, va, small); 479 tile_net_provide_linux_buffer(info, va, small);
437 480
@@ -469,48 +512,64 @@ oops:
469 * Grab some LEPP completions, and store them in "comps", of size 512 * Grab some LEPP completions, and store them in "comps", of size
470 * "comps_size", and return the number of completions which were 513 * "comps_size", and return the number of completions which were
471 * stored, so the caller can free them. 514 * stored, so the caller can free them.
472 *
473 * If "pending" is not NULL, it will be set to true if there might
474 * still be some pending completions caused by this tile, else false.
475 */ 515 */
476static unsigned int tile_net_lepp_grab_comps(struct net_device *dev, 516static unsigned int tile_net_lepp_grab_comps(lepp_queue_t *eq,
477 struct sk_buff *comps[], 517 struct sk_buff *comps[],
478 unsigned int comps_size, 518 unsigned int comps_size,
479 bool *pending) 519 unsigned int min_size)
480{ 520{
481 struct tile_net_priv *priv = netdev_priv(dev);
482
483 lepp_queue_t *eq = priv->epp_queue;
484
485 unsigned int n = 0; 521 unsigned int n = 0;
486 522
487 unsigned int comp_head; 523 unsigned int comp_head = eq->comp_head;
488 unsigned int comp_busy; 524 unsigned int comp_busy = eq->comp_busy;
489 unsigned int comp_tail;
490
491 spin_lock(&priv->comp_lock);
492
493 comp_head = eq->comp_head;
494 comp_busy = eq->comp_busy;
495 comp_tail = eq->comp_tail;
496 525
497 while (comp_head != comp_busy && n < comps_size) { 526 while (comp_head != comp_busy && n < comps_size) {
498 comps[n++] = eq->comps[comp_head]; 527 comps[n++] = eq->comps[comp_head];
499 LEPP_QINC(comp_head); 528 LEPP_QINC(comp_head);
500 } 529 }
501 530
502 if (pending != NULL) 531 if (n < min_size)
503 *pending = (comp_head != comp_tail); 532 return 0;
504 533
505 eq->comp_head = comp_head; 534 eq->comp_head = comp_head;
506 535
507 spin_unlock(&priv->comp_lock);
508
509 return n; 536 return n;
510} 537}
511 538
512 539
513/* 540/*
541 * Free some comps, and return true iff there are still some pending.
542 */
543static bool tile_net_lepp_free_comps(struct net_device *dev, bool all)
544{
545 struct tile_net_priv *priv = netdev_priv(dev);
546
547 lepp_queue_t *eq = priv->eq;
548
549 struct sk_buff *olds[64];
550 unsigned int wanted = 64;
551 unsigned int i, n;
552 bool pending;
553
554 spin_lock(&priv->eq_lock);
555
556 if (all)
557 eq->comp_busy = eq->comp_tail;
558
559 n = tile_net_lepp_grab_comps(eq, olds, wanted, 0);
560
561 pending = (eq->comp_head != eq->comp_tail);
562
563 spin_unlock(&priv->eq_lock);
564
565 for (i = 0; i < n; i++)
566 kfree_skb(olds[i]);
567
568 return pending;
569}
570
571
572/*
514 * Make sure the egress timer is scheduled. 573 * Make sure the egress timer is scheduled.
515 * 574 *
516 * Note that we use "schedule if not scheduled" logic instead of the more 575 * Note that we use "schedule if not scheduled" logic instead of the more
@@ -544,21 +603,11 @@ static void tile_net_handle_egress_timer(unsigned long arg)
544 struct tile_net_cpu *info = (struct tile_net_cpu *)arg; 603 struct tile_net_cpu *info = (struct tile_net_cpu *)arg;
545 struct net_device *dev = info->napi.dev; 604 struct net_device *dev = info->napi.dev;
546 605
547 struct sk_buff *olds[32];
548 unsigned int wanted = 32;
549 unsigned int i, nolds = 0;
550 bool pending;
551
552 /* The timer is no longer scheduled. */ 606 /* The timer is no longer scheduled. */
553 info->egress_timer_scheduled = false; 607 info->egress_timer_scheduled = false;
554 608
555 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending); 609 /* Free comps, and reschedule timer if more are pending. */
556 610 if (tile_net_lepp_free_comps(dev, false))
557 for (i = 0; i < nolds; i++)
558 kfree_skb(olds[i]);
559
560 /* Reschedule timer if needed. */
561 if (pending)
562 tile_net_schedule_egress_timer(info); 611 tile_net_schedule_egress_timer(info);
563} 612}
564 613
@@ -636,8 +685,39 @@ static bool is_dup_ack(char *s1, char *s2, unsigned int len)
636 685
637 686
638 687
688static void tile_net_discard_aux(struct tile_net_cpu *info, int index)
689{
690 struct tile_netio_queue *queue = &info->queue;
691 netio_queue_impl_t *qsp = queue->__system_part;
692 netio_queue_user_impl_t *qup = &queue->__user_part;
693
694 int index2_aux = index + sizeof(netio_pkt_t);
695 int index2 =
696 ((index2_aux ==
697 qsp->__packet_receive_queue.__last_packet_plus_one) ?
698 0 : index2_aux);
699
700 netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index);
701
702 /* Extract the "linux_buffer_t". */
703 unsigned int buffer = pkt->__packet.word;
704
705 /* Convert "linux_buffer_t" to "va". */
706 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
707
708 /* Acquire the associated "skb". */
709 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
710 struct sk_buff *skb = *skb_ptr;
711
712 kfree_skb(skb);
713
714 /* Consume this packet. */
715 qup->__packet_receive_read = index2;
716}
717
718
639/* 719/*
640 * Like "tile_net_handle_packets()", but just discard packets. 720 * Like "tile_net_poll()", but just discard packets.
641 */ 721 */
642static void tile_net_discard_packets(struct net_device *dev) 722static void tile_net_discard_packets(struct net_device *dev)
643{ 723{
@@ -650,32 +730,8 @@ static void tile_net_discard_packets(struct net_device *dev)
650 730
651 while (qup->__packet_receive_read != 731 while (qup->__packet_receive_read !=
652 qsp->__packet_receive_queue.__packet_write) { 732 qsp->__packet_receive_queue.__packet_write) {
653
654 int index = qup->__packet_receive_read; 733 int index = qup->__packet_receive_read;
655 734 tile_net_discard_aux(info, index);
656 int index2_aux = index + sizeof(netio_pkt_t);
657 int index2 =
658 ((index2_aux ==
659 qsp->__packet_receive_queue.__last_packet_plus_one) ?
660 0 : index2_aux);
661
662 netio_pkt_t *pkt = (netio_pkt_t *)
663 ((unsigned long) &qsp[1] + index);
664
665 /* Extract the "linux_buffer_t". */
666 unsigned int buffer = pkt->__packet.word;
667
668 /* Convert "linux_buffer_t" to "va". */
669 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
670
671 /* Acquire the associated "skb". */
672 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
673 struct sk_buff *skb = *skb_ptr;
674
675 kfree_skb(skb);
676
677 /* Consume this packet. */
678 qup->__packet_receive_read = index2;
679 } 735 }
680} 736}
681 737
@@ -704,7 +760,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
704 760
705 netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); 761 netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt);
706 762
707 /* Extract the packet size. */ 763 /* Extract the packet size. FIXME: Shouldn't the second line */
764 /* get subtracted? Mostly moot, since it should be "zero". */
708 unsigned long len = 765 unsigned long len =
709 (NETIO_PKT_CUSTOM_LENGTH(pkt) + 766 (NETIO_PKT_CUSTOM_LENGTH(pkt) +
710 NET_IP_ALIGN - NETIO_PACKET_PADDING); 767 NET_IP_ALIGN - NETIO_PACKET_PADDING);
@@ -722,15 +779,6 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
722 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ 779 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
723 unsigned char *buf = va + NET_IP_ALIGN; 780 unsigned char *buf = va + NET_IP_ALIGN;
724 781
725#ifdef IGNORE_DUP_ACKS
726
727 static int other;
728 static int final;
729 static int keep;
730 static int skip;
731
732#endif
733
734 /* Invalidate the packet buffer. */ 782 /* Invalidate the packet buffer. */
735 if (!hash_default) 783 if (!hash_default)
736 __inv_buffer(buf, len); 784 __inv_buffer(buf, len);
@@ -745,16 +793,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
745#ifdef TILE_NET_VERIFY_INGRESS 793#ifdef TILE_NET_VERIFY_INGRESS
746 if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && 794 if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) &&
747 NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { 795 NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) {
748 /* 796 /* Bug 6624: Includes UDP packets with a "zero" checksum. */
749 * FIXME: This complains about UDP packets
750 * with a "zero" checksum (bug 6624).
751 */
752#ifdef TILE_NET_PANIC_ON_BAD
753 dump_packet(buf, len, "rx");
754 panic("Bad L4 checksum.");
755#else
756 pr_warning("Bad L4 checksum on %d byte packet.\n", len); 797 pr_warning("Bad L4 checksum on %d byte packet.\n", len);
757#endif
758 } 798 }
759 if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && 799 if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) &&
760 NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { 800 NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) {
@@ -769,90 +809,29 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
769 } 809 }
770 break; 810 break;
771 case NETIO_PKT_STATUS_BAD: 811 case NETIO_PKT_STATUS_BAD:
772#ifdef TILE_NET_PANIC_ON_BAD 812 pr_warning("Unexpected BAD %ld byte packet.\n", len);
773 dump_packet(buf, len, "rx");
774 panic("Unexpected BAD packet.");
775#else
776 pr_warning("Unexpected BAD %d byte packet.\n", len);
777#endif
778 } 813 }
779#endif 814#endif
780 815
781 filter = 0; 816 filter = 0;
782 817
818 /* ISSUE: Filter TCP packets with "bad" checksums? */
819
783 if (!(dev->flags & IFF_UP)) { 820 if (!(dev->flags & IFF_UP)) {
784 /* Filter packets received before we're up. */ 821 /* Filter packets received before we're up. */
785 filter = 1; 822 filter = 1;
823 } else if (NETIO_PKT_STATUS_M(metadata, pkt) == NETIO_PKT_STATUS_BAD) {
824 /* Filter "truncated" packets. */
825 filter = 1;
786 } else if (!(dev->flags & IFF_PROMISC)) { 826 } else if (!(dev->flags & IFF_PROMISC)) {
787 /* 827 /* FIXME: Implement HW multicast filter. */
788 * FIXME: Implement HW multicast filter. 828 if (!is_multicast_ether_addr(buf)) {
789 */
790 if (is_unicast_ether_addr(buf)) {
791 /* Filter packets not for our address. */ 829 /* Filter packets not for our address. */
792 const u8 *mine = dev->dev_addr; 830 const u8 *mine = dev->dev_addr;
793 filter = compare_ether_addr(mine, buf); 831 filter = compare_ether_addr(mine, buf);
794 } 832 }
795 } 833 }
796 834
797#ifdef IGNORE_DUP_ACKS
798
799 if (len != 66) {
800 /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */
801
802 other++;
803
804 } else if (index2 ==
805 qsp->__packet_receive_queue.__packet_write) {
806
807 final++;
808
809 } else {
810
811 netio_pkt_t *pkt2 = (netio_pkt_t *)
812 ((unsigned long) &qsp[1] + index2);
813
814 netio_pkt_metadata_t *metadata2 =
815 NETIO_PKT_METADATA(pkt2);
816
817 /* Extract the packet size. */
818 unsigned long len2 =
819 (NETIO_PKT_CUSTOM_LENGTH(pkt2) +
820 NET_IP_ALIGN - NETIO_PACKET_PADDING);
821
822 if (len2 == 66 &&
823 NETIO_PKT_FLOW_HASH_M(metadata, pkt) ==
824 NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) {
825
826 /* Extract the "linux_buffer_t". */
827 unsigned int buffer2 = pkt2->__packet.word;
828
829 /* Convert "linux_buffer_t" to "va". */
830 void *va2 =
831 __va((phys_addr_t)(buffer2 >> 1) << 7);
832
833 /* Extract the packet data pointer. */
834 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
835 unsigned char *buf2 = va2 + NET_IP_ALIGN;
836
837 /* Invalidate the packet buffer. */
838 if (!hash_default)
839 __inv_buffer(buf2, len2);
840
841 if (is_dup_ack(buf, buf2, len)) {
842 skip++;
843 filter = 1;
844 } else {
845 keep++;
846 }
847 }
848 }
849
850 if (net_ratelimit())
851 pr_info("Other %d Final %d Keep %d Skip %d.\n",
852 other, final, keep, skip);
853
854#endif
855
856 if (filter) { 835 if (filter) {
857 836
858 /* ISSUE: Update "drop" statistics? */ 837 /* ISSUE: Update "drop" statistics? */
@@ -877,10 +856,7 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
877 /* NOTE: This call also sets "skb->dev = dev". */ 856 /* NOTE: This call also sets "skb->dev = dev". */
878 skb->protocol = eth_type_trans(skb, dev); 857 skb->protocol = eth_type_trans(skb, dev);
879 858
880 /* ISSUE: Discard corrupt packets? */ 859 /* Avoid recomputing "good" TCP/UDP checksums. */
881 /* ISSUE: Discard packets with bad checksums? */
882
883 /* Avoid recomputing TCP/UDP checksums. */
884 if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) 860 if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt))
885 skb->ip_summed = CHECKSUM_UNNECESSARY; 861 skb->ip_summed = CHECKSUM_UNNECESSARY;
886 862
@@ -912,9 +888,14 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
912/* 888/*
913 * Handle some packets for the given device on the current CPU. 889 * Handle some packets for the given device on the current CPU.
914 * 890 *
915 * ISSUE: The "rotting packet" race condition occurs if a packet 891 * If "tile_net_stop()" is called on some other tile while this
916 * arrives after the queue appears to be empty, and before the 892 * function is running, we will return, hopefully before that
917 * hypervisor interrupt is re-enabled. 893 * other tile asks us to call "napi_disable()".
894 *
895 * The "rotting packet" race condition occurs if a packet arrives
896 * during the extremely narrow window between the queue appearing to
897 * be empty, and the ingress interrupt being re-enabled. This happens
898 * a LOT under heavy network load.
918 */ 899 */
919static int tile_net_poll(struct napi_struct *napi, int budget) 900static int tile_net_poll(struct napi_struct *napi, int budget)
920{ 901{
@@ -928,7 +909,7 @@ static int tile_net_poll(struct napi_struct *napi, int budget)
928 909
929 unsigned int work = 0; 910 unsigned int work = 0;
930 911
931 while (1) { 912 while (priv->active) {
932 int index = qup->__packet_receive_read; 913 int index = qup->__packet_receive_read;
933 if (index == qsp->__packet_receive_queue.__packet_write) 914 if (index == qsp->__packet_receive_queue.__packet_write)
934 break; 915 break;
@@ -941,19 +922,24 @@ static int tile_net_poll(struct napi_struct *napi, int budget)
941 922
942 napi_complete(&info->napi); 923 napi_complete(&info->napi);
943 924
944 /* Re-enable hypervisor interrupts. */ 925 if (!priv->active)
926 goto done;
927
928 /* Re-enable the ingress interrupt. */
945 enable_percpu_irq(priv->intr_id); 929 enable_percpu_irq(priv->intr_id);
946 930
947 /* HACK: Avoid the "rotting packet" problem. */ 931 /* HACK: Avoid the "rotting packet" problem (see above). */
948 if (qup->__packet_receive_read != 932 if (qup->__packet_receive_read !=
949 qsp->__packet_receive_queue.__packet_write) 933 qsp->__packet_receive_queue.__packet_write) {
950 napi_schedule(&info->napi); 934 /* ISSUE: Sometimes this returns zero, presumably */
951 935 /* because an interrupt was handled for this tile. */
952 /* ISSUE: Handle completions? */ 936 (void)napi_reschedule(&info->napi);
937 }
953 938
954done: 939done:
955 940
956 tile_net_provide_needed_buffers(info); 941 if (priv->active)
942 tile_net_provide_needed_buffers(info);
957 943
958 return work; 944 return work;
959} 945}
@@ -961,6 +947,12 @@ done:
961 947
962/* 948/*
963 * Handle an ingress interrupt for the given device on the current cpu. 949 * Handle an ingress interrupt for the given device on the current cpu.
950 *
951 * ISSUE: Sometimes this gets called after "disable_percpu_irq()" has
952 * been called! This is probably due to "pending hypervisor downcalls".
953 *
954 * ISSUE: Is there any race condition between the "napi_schedule()" here
955 * and the "napi_complete()" call above?
964 */ 956 */
965static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) 957static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
966{ 958{
@@ -969,9 +961,15 @@ static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
969 int my_cpu = smp_processor_id(); 961 int my_cpu = smp_processor_id();
970 struct tile_net_cpu *info = priv->cpu[my_cpu]; 962 struct tile_net_cpu *info = priv->cpu[my_cpu];
971 963
972 /* Disable hypervisor interrupt. */ 964 /* Disable the ingress interrupt. */
973 disable_percpu_irq(priv->intr_id); 965 disable_percpu_irq(priv->intr_id);
974 966
967 /* Ignore unwanted interrupts. */
968 if (!priv->active)
969 return IRQ_HANDLED;
970
971 /* ISSUE: Sometimes "info->napi_enabled" is false here. */
972
975 napi_schedule(&info->napi); 973 napi_schedule(&info->napi);
976 974
977 return IRQ_HANDLED; 975 return IRQ_HANDLED;
@@ -1005,8 +1003,7 @@ static int tile_net_open_aux(struct net_device *dev)
1005 */ 1003 */
1006 { 1004 {
1007 int epp_home = hv_lotar_to_cpu(epp_lotar); 1005 int epp_home = hv_lotar_to_cpu(epp_lotar);
1008 struct page *page = virt_to_page(priv->epp_queue); 1006 homecache_change_page_home(priv->eq_pages, EQ_ORDER, epp_home);
1009 homecache_change_page_home(page, 0, epp_home);
1010 } 1007 }
1011 1008
1012 /* 1009 /*
@@ -1015,9 +1012,9 @@ static int tile_net_open_aux(struct net_device *dev)
1015 { 1012 {
1016 netio_ipp_address_t ea = { 1013 netio_ipp_address_t ea = {
1017 .va = 0, 1014 .va = 0,
1018 .pa = __pa(priv->epp_queue), 1015 .pa = __pa(priv->eq),
1019 .pte = hv_pte(0), 1016 .pte = hv_pte(0),
1020 .size = PAGE_SIZE, 1017 .size = EQ_SIZE,
1021 }; 1018 };
1022 ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); 1019 ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar);
1023 ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); 1020 ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3);
@@ -1043,7 +1040,7 @@ static int tile_net_open_aux(struct net_device *dev)
1043 1040
1044 1041
1045/* 1042/*
1046 * Register with hypervisor on each CPU. 1043 * Register with hypervisor on the current CPU.
1047 * 1044 *
1048 * Strangely, this function does important things even if it "fails", 1045 * Strangely, this function does important things even if it "fails",
1049 * which is especially common if the link is not up yet. Hopefully 1046 * which is especially common if the link is not up yet. Hopefully
@@ -1092,7 +1089,8 @@ static void tile_net_register(void *dev_ptr)
1092 priv->cpu[my_cpu] = info; 1089 priv->cpu[my_cpu] = info;
1093 1090
1094 /* 1091 /*
1095 * Register ourselves with the IPP. 1092 * Register ourselves with LIPP. This does a lot of stuff,
1093 * including invoking the LIPP registration code.
1096 */ 1094 */
1097 ret = hv_dev_pwrite(priv->hv_devhdl, 0, 1095 ret = hv_dev_pwrite(priv->hv_devhdl, 0,
1098 (HV_VirtAddr)&config, 1096 (HV_VirtAddr)&config,
@@ -1101,8 +1099,11 @@ static void tile_net_register(void *dev_ptr)
1101 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", 1099 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
1102 ret); 1100 ret);
1103 if (ret < 0) { 1101 if (ret < 0) {
1104 printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF" 1102 if (ret != NETIO_LINK_DOWN) {
1105 " failure %d\n", ret); 1103 printk(KERN_DEBUG "hv_dev_pwrite "
1104 "NETIO_IPP_INPUT_REGISTER_OFF failure %d\n",
1105 ret);
1106 }
1106 info->link_down = (ret == NETIO_LINK_DOWN); 1107 info->link_down = (ret == NETIO_LINK_DOWN);
1107 return; 1108 return;
1108 } 1109 }
@@ -1145,15 +1146,47 @@ static void tile_net_register(void *dev_ptr)
1145 NETIO_IPP_GET_FASTIO_OFF); 1146 NETIO_IPP_GET_FASTIO_OFF);
1146 PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); 1147 PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret);
1147 1148
1148 netif_napi_add(dev, &info->napi, tile_net_poll, 64);
1149
1150 /* Now we are registered. */ 1149 /* Now we are registered. */
1151 info->registered = true; 1150 info->registered = true;
1152} 1151}
1153 1152
1154 1153
1155/* 1154/*
1156 * Unregister with hypervisor on each CPU. 1155 * Deregister with hypervisor on the current CPU.
1156 *
1157 * This simply discards all our credits, so no more packets will be
1158 * delivered to this tile. There may still be packets in our queue.
1159 *
1160 * Also, disable the ingress interrupt.
1161 */
1162static void tile_net_deregister(void *dev_ptr)
1163{
1164 struct net_device *dev = (struct net_device *)dev_ptr;
1165 struct tile_net_priv *priv = netdev_priv(dev);
1166 int my_cpu = smp_processor_id();
1167 struct tile_net_cpu *info = priv->cpu[my_cpu];
1168
1169 /* Disable the ingress interrupt. */
1170 disable_percpu_irq(priv->intr_id);
1171
1172 /* Do nothing else if not registered. */
1173 if (info == NULL || !info->registered)
1174 return;
1175
1176 {
1177 struct tile_netio_queue *queue = &info->queue;
1178 netio_queue_user_impl_t *qup = &queue->__user_part;
1179
1180 /* Discard all our credits. */
1181 __netio_fastio_return_credits(qup->__fastio_index, -1);
1182 }
1183}
1184
1185
1186/*
1187 * Unregister with hypervisor on the current CPU.
1188 *
1189 * Also, disable the ingress interrupt.
1157 */ 1190 */
1158static void tile_net_unregister(void *dev_ptr) 1191static void tile_net_unregister(void *dev_ptr)
1159{ 1192{
@@ -1162,35 +1195,23 @@ static void tile_net_unregister(void *dev_ptr)
1162 int my_cpu = smp_processor_id(); 1195 int my_cpu = smp_processor_id();
1163 struct tile_net_cpu *info = priv->cpu[my_cpu]; 1196 struct tile_net_cpu *info = priv->cpu[my_cpu];
1164 1197
1165 int ret = 0; 1198 int ret;
1166 int dummy = 0; 1199 int dummy = 0;
1167 1200
1168 /* Do nothing if never registered. */ 1201 /* Disable the ingress interrupt. */
1169 if (info == NULL) 1202 disable_percpu_irq(priv->intr_id);
1170 return;
1171 1203
1172 /* Do nothing if already unregistered. */ 1204 /* Do nothing else if not registered. */
1173 if (!info->registered) 1205 if (info == NULL || !info->registered)
1174 return; 1206 return;
1175 1207
1176 /* 1208 /* Unregister ourselves with LIPP/LEPP. */
1177 * Unregister ourselves with LIPP.
1178 */
1179 ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1209 ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1180 sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); 1210 sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF);
1181 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n", 1211 if (ret < 0)
1182 ret); 1212 panic("Failed to unregister with LIPP/LEPP!\n");
1183 if (ret < 0) {
1184 /* FIXME: Just panic? */
1185 pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF"
1186 " failure %d\n", ret);
1187 }
1188 1213
1189 /* 1214 /* Discard all packets still in our NetIO queue. */
1190 * Discard all packets still in our NetIO queue. Hopefully,
1191 * once the unregister call is complete, there will be no
1192 * packets still in flight on the IDN.
1193 */
1194 tile_net_discard_packets(dev); 1215 tile_net_discard_packets(dev);
1195 1216
1196 /* Reset state. */ 1217 /* Reset state. */
@@ -1200,11 +1221,6 @@ static void tile_net_unregister(void *dev_ptr)
1200 /* Cancel egress timer. */ 1221 /* Cancel egress timer. */
1201 del_timer(&info->egress_timer); 1222 del_timer(&info->egress_timer);
1202 info->egress_timer_scheduled = false; 1223 info->egress_timer_scheduled = false;
1203
1204 netif_napi_del(&info->napi);
1205
1206 /* Now we are unregistered. */
1207 info->registered = false;
1208} 1224}
1209 1225
1210 1226
@@ -1212,18 +1228,28 @@ static void tile_net_unregister(void *dev_ptr)
1212 * Helper function for "tile_net_stop()". 1228 * Helper function for "tile_net_stop()".
1213 * 1229 *
1214 * Also used to handle registration failure in "tile_net_open_inner()", 1230 * Also used to handle registration failure in "tile_net_open_inner()",
1215 * when "fully_opened" is known to be false, and the various extra 1231 * when the various extra steps in "tile_net_stop()" are not necessary.
1216 * steps in "tile_net_stop()" are not necessary. ISSUE: It might be
1217 * simpler if we could just call "tile_net_stop()" anyway.
1218 */ 1232 */
1219static void tile_net_stop_aux(struct net_device *dev) 1233static void tile_net_stop_aux(struct net_device *dev)
1220{ 1234{
1221 struct tile_net_priv *priv = netdev_priv(dev); 1235 struct tile_net_priv *priv = netdev_priv(dev);
1236 int i;
1222 1237
1223 int dummy = 0; 1238 int dummy = 0;
1224 1239
1225 /* Unregister all tiles, so LIPP will stop delivering packets. */ 1240 /*
1241 * Unregister all tiles, so LIPP will stop delivering packets.
1242 * Also, delete all the "napi" objects (sequentially, to protect
1243 * "dev->napi_list").
1244 */
1226 on_each_cpu(tile_net_unregister, (void *)dev, 1); 1245 on_each_cpu(tile_net_unregister, (void *)dev, 1);
1246 for_each_online_cpu(i) {
1247 struct tile_net_cpu *info = priv->cpu[i];
1248 if (info != NULL && info->registered) {
1249 netif_napi_del(&info->napi);
1250 info->registered = false;
1251 }
1252 }
1227 1253
1228 /* Stop LIPP/LEPP. */ 1254 /* Stop LIPP/LEPP. */
1229 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1255 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
@@ -1235,18 +1261,15 @@ static void tile_net_stop_aux(struct net_device *dev)
1235 1261
1236 1262
1237/* 1263/*
1238 * Disable ingress interrupts for the given device on the current cpu. 1264 * Disable NAPI for the given device on the current cpu.
1239 */ 1265 */
1240static void tile_net_disable_intr(void *dev_ptr) 1266static void tile_net_stop_disable(void *dev_ptr)
1241{ 1267{
1242 struct net_device *dev = (struct net_device *)dev_ptr; 1268 struct net_device *dev = (struct net_device *)dev_ptr;
1243 struct tile_net_priv *priv = netdev_priv(dev); 1269 struct tile_net_priv *priv = netdev_priv(dev);
1244 int my_cpu = smp_processor_id(); 1270 int my_cpu = smp_processor_id();
1245 struct tile_net_cpu *info = priv->cpu[my_cpu]; 1271 struct tile_net_cpu *info = priv->cpu[my_cpu];
1246 1272
1247 /* Disable hypervisor interrupt. */
1248 disable_percpu_irq(priv->intr_id);
1249
1250 /* Disable NAPI if needed. */ 1273 /* Disable NAPI if needed. */
1251 if (info != NULL && info->napi_enabled) { 1274 if (info != NULL && info->napi_enabled) {
1252 napi_disable(&info->napi); 1275 napi_disable(&info->napi);
@@ -1256,21 +1279,24 @@ static void tile_net_disable_intr(void *dev_ptr)
1256 1279
1257 1280
1258/* 1281/*
1259 * Enable ingress interrupts for the given device on the current cpu. 1282 * Enable NAPI and the ingress interrupt for the given device
1283 * on the current cpu.
1284 *
1285 * ISSUE: Only do this for "network cpus"?
1260 */ 1286 */
1261static void tile_net_enable_intr(void *dev_ptr) 1287static void tile_net_open_enable(void *dev_ptr)
1262{ 1288{
1263 struct net_device *dev = (struct net_device *)dev_ptr; 1289 struct net_device *dev = (struct net_device *)dev_ptr;
1264 struct tile_net_priv *priv = netdev_priv(dev); 1290 struct tile_net_priv *priv = netdev_priv(dev);
1265 int my_cpu = smp_processor_id(); 1291 int my_cpu = smp_processor_id();
1266 struct tile_net_cpu *info = priv->cpu[my_cpu]; 1292 struct tile_net_cpu *info = priv->cpu[my_cpu];
1267 1293
1268 /* Enable hypervisor interrupt. */
1269 enable_percpu_irq(priv->intr_id);
1270
1271 /* Enable NAPI. */ 1294 /* Enable NAPI. */
1272 napi_enable(&info->napi); 1295 napi_enable(&info->napi);
1273 info->napi_enabled = true; 1296 info->napi_enabled = true;
1297
1298 /* Enable the ingress interrupt. */
1299 enable_percpu_irq(priv->intr_id);
1274} 1300}
1275 1301
1276 1302
@@ -1288,8 +1314,9 @@ static int tile_net_open_inner(struct net_device *dev)
1288 int my_cpu = smp_processor_id(); 1314 int my_cpu = smp_processor_id();
1289 struct tile_net_cpu *info; 1315 struct tile_net_cpu *info;
1290 struct tile_netio_queue *queue; 1316 struct tile_netio_queue *queue;
1291 unsigned int irq; 1317 int result = 0;
1292 int i; 1318 int i;
1319 int dummy = 0;
1293 1320
1294 /* 1321 /*
1295 * First try to register just on the local CPU, and handle any 1322 * First try to register just on the local CPU, and handle any
@@ -1307,42 +1334,52 @@ static int tile_net_open_inner(struct net_device *dev)
1307 /* 1334 /*
1308 * Now register everywhere else. If any registration fails, 1335 * Now register everywhere else. If any registration fails,
1309 * even for "link down" (which might not be possible), we 1336 * even for "link down" (which might not be possible), we
1310 * clean up using "tile_net_stop_aux()". 1337 * clean up using "tile_net_stop_aux()". Also, add all the
1338 * "napi" objects (sequentially, to protect "dev->napi_list").
1339 * ISSUE: Only use "netif_napi_add()" for "network cpus"?
1311 */ 1340 */
1312 smp_call_function(tile_net_register, (void *)dev, 1); 1341 smp_call_function(tile_net_register, (void *)dev, 1);
1313 for_each_online_cpu(i) { 1342 for_each_online_cpu(i) {
1314 if (!priv->cpu[i]->registered) { 1343 struct tile_net_cpu *info = priv->cpu[i];
1315 tile_net_stop_aux(dev); 1344 if (info->registered)
1316 return -EAGAIN; 1345 netif_napi_add(dev, &info->napi, tile_net_poll, 64);
1317 } 1346 else
1347 result = -EAGAIN;
1348 }
1349 if (result != 0) {
1350 tile_net_stop_aux(dev);
1351 return result;
1318 } 1352 }
1319 1353
1320 queue = &info->queue; 1354 queue = &info->queue;
1321 1355
1322 /* 1356 if (priv->intr_id == 0) {
1323 * Set the device intr bit mask. 1357 unsigned int irq;
1324 * The tile_net_register above sets per tile __intr_id.
1325 */
1326 priv->intr_id = queue->__system_part->__intr_id;
1327 BUG_ON(!priv->intr_id);
1328
1329 /*
1330 * Register the device interrupt handler.
1331 * The __ffs() function returns the index into the interrupt handler
1332 * table from the interrupt bit mask which should have one bit
1333 * and one bit only set.
1334 */
1335 irq = __ffs(priv->intr_id);
1336 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1337 BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
1338 0, dev->name, (void *)dev) != 0);
1339 1358
1340 /* ISSUE: How could "priv->fully_opened" ever be "true" here? */ 1359 /*
1341 1360 * Acquire the irq allocated by the hypervisor. Every
1342 if (!priv->fully_opened) { 1361 * queue gets the same irq. The "__intr_id" field is
1362 * "1 << irq", so we use "__ffs()" to extract "irq".
1363 */
1364 priv->intr_id = queue->__system_part->__intr_id;
1365 BUG_ON(priv->intr_id == 0);
1366 irq = __ffs(priv->intr_id);
1343 1367
1344 int dummy = 0; 1368 /*
1369 * Register the ingress interrupt handler for this
1370 * device, permanently.
1371 *
1372 * We used to call "free_irq()" in "tile_net_stop()",
1373 * and then re-register the handler here every time,
1374 * but that caused DNP errors in "handle_IRQ_event()"
1375 * because "desc->action" was NULL. See bug 9143.
1376 */
1377 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1378 BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
1379 0, dev->name, (void *)dev) != 0);
1380 }
1345 1381
1382 {
1346 /* Allocate initial buffers. */ 1383 /* Allocate initial buffers. */
1347 1384
1348 int max_buffers = 1385 int max_buffers =
@@ -1359,18 +1396,21 @@ static int tile_net_open_inner(struct net_device *dev)
1359 if (info->num_needed_small_buffers != 0 || 1396 if (info->num_needed_small_buffers != 0 ||
1360 info->num_needed_large_buffers != 0) 1397 info->num_needed_large_buffers != 0)
1361 panic("Insufficient memory for buffer stack!"); 1398 panic("Insufficient memory for buffer stack!");
1399 }
1362 1400
1363 /* Start LIPP/LEPP and activate "ingress" at the shim. */ 1401 /* We are about to be active. */
1364 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1402 priv->active = true;
1365 sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
1366 panic("Failed to activate the LIPP Shim!\n");
1367 1403
1368 priv->fully_opened = 1; 1404 /* Make sure "active" is visible to all tiles. */
1369 } 1405 mb();
1370 1406
1371 /* On each tile, enable the hypervisor to trigger interrupts. */ 1407 /* On each tile, enable NAPI and the ingress interrupt. */
1372 /* ISSUE: Do this before starting LIPP/LEPP? */ 1408 on_each_cpu(tile_net_open_enable, (void *)dev, 1);
1373 on_each_cpu(tile_net_enable_intr, (void *)dev, 1); 1409
1410 /* Start LIPP/LEPP and activate "ingress" at the shim. */
1411 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1412 sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
1413 panic("Failed to activate the LIPP Shim!\n");
1374 1414
1375 /* Start our transmit queue. */ 1415 /* Start our transmit queue. */
1376 netif_start_queue(dev); 1416 netif_start_queue(dev);
@@ -1396,9 +1436,9 @@ static void tile_net_open_retry(struct work_struct *w)
1396 * ourselves to try again later; otherwise, tell Linux we now have 1436 * ourselves to try again later; otherwise, tell Linux we now have
1397 * a working link. ISSUE: What if the return value is negative? 1437 * a working link. ISSUE: What if the return value is negative?
1398 */ 1438 */
1399 if (tile_net_open_inner(priv->dev)) 1439 if (tile_net_open_inner(priv->dev) != 0)
1400 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, 1440 schedule_delayed_work(&priv->retry_work,
1401 TILE_NET_RETRY_INTERVAL); 1441 TILE_NET_RETRY_INTERVAL);
1402 else 1442 else
1403 netif_carrier_on(priv->dev); 1443 netif_carrier_on(priv->dev);
1404} 1444}
@@ -1412,8 +1452,8 @@ static void tile_net_open_retry(struct work_struct *w)
1412 * The open entry point is called when a network interface is made 1452 * The open entry point is called when a network interface is made
1413 * active by the system (IFF_UP). At this point all resources needed 1453 * active by the system (IFF_UP). At this point all resources needed
1414 * for transmit and receive operations are allocated, the interrupt 1454 * for transmit and receive operations are allocated, the interrupt
1415 * handler is registered with the OS, the watchdog timer is started, 1455 * handler is registered with the OS (if needed), the watchdog timer
1416 * and the stack is notified that the interface is ready. 1456 * is started, and the stack is notified that the interface is ready.
1417 * 1457 *
1418 * If the actual link is not available yet, then we tell Linux that 1458 * If the actual link is not available yet, then we tell Linux that
1419 * we have no carrier, and we keep checking until the link comes up. 1459 * we have no carrier, and we keep checking until the link comes up.
@@ -1468,6 +1508,10 @@ static int tile_net_open(struct net_device *dev)
1468#endif 1508#endif
1469 1509
1470 priv->partly_opened = 1; 1510 priv->partly_opened = 1;
1511
1512 } else {
1513 /* FIXME: Is this possible? */
1514 /* printk("Already partly opened.\n"); */
1471 } 1515 }
1472 1516
1473 /* 1517 /*
@@ -1487,57 +1531,17 @@ static int tile_net_open(struct net_device *dev)
1487 * and then remember to try again later. 1531 * and then remember to try again later.
1488 */ 1532 */
1489 netif_carrier_off(dev); 1533 netif_carrier_off(dev);
1490 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, 1534 schedule_delayed_work(&priv->retry_work, TILE_NET_RETRY_INTERVAL);
1491 TILE_NET_RETRY_INTERVAL);
1492 1535
1493 return 0; 1536 return 0;
1494} 1537}
1495 1538
1496 1539
1497/* 1540static int tile_net_drain_lipp_buffers(struct tile_net_priv *priv)
1498 * Disables a network interface.
1499 *
1500 * Returns 0, this is not allowed to fail.
1501 *
1502 * The close entry point is called when an interface is de-activated
1503 * by the OS. The hardware is still under the drivers control, but
1504 * needs to be disabled. A global MAC reset is issued to stop the
1505 * hardware, and all transmit and receive resources are freed.
1506 *
1507 * ISSUE: Can this can be called while "tile_net_poll()" is running?
1508 */
1509static int tile_net_stop(struct net_device *dev)
1510{ 1541{
1511 struct tile_net_priv *priv = netdev_priv(dev); 1542 int n = 0;
1512
1513 bool pending = true;
1514
1515 PDEBUG("tile_net_stop()\n");
1516
1517 /* ISSUE: Only needed if not yet fully open. */
1518 cancel_delayed_work_sync(&priv->retry_work);
1519
1520 /* Can't transmit any more. */
1521 netif_stop_queue(dev);
1522
1523 /*
1524 * Disable hypervisor interrupts on each tile.
1525 */
1526 on_each_cpu(tile_net_disable_intr, (void *)dev, 1);
1527
1528 /*
1529 * Unregister the interrupt handler.
1530 * The __ffs() function returns the index into the interrupt handler
1531 * table from the interrupt bit mask which should have one bit
1532 * and one bit only set.
1533 */
1534 if (priv->intr_id)
1535 free_irq(__ffs(priv->intr_id), dev);
1536
1537 /*
1538 * Drain all the LIPP buffers.
1539 */
1540 1543
1544 /* Drain all the LIPP buffers. */
1541 while (true) { 1545 while (true) {
1542 int buffer; 1546 int buffer;
1543 1547
@@ -1560,43 +1564,105 @@ static int tile_net_stop(struct net_device *dev)
1560 1564
1561 kfree_skb(skb); 1565 kfree_skb(skb);
1562 } 1566 }
1567
1568 n++;
1563 } 1569 }
1564 1570
1565 /* Stop LIPP/LEPP. */ 1571 return n;
1566 tile_net_stop_aux(dev); 1572}
1567 1573
1568 1574
1569 priv->fully_opened = 0; 1575/*
1576 * Disables a network interface.
1577 *
1578 * Returns 0, this is not allowed to fail.
1579 *
1580 * The close entry point is called when an interface is de-activated
1581 * by the OS. The hardware is still under the drivers control, but
1582 * needs to be disabled. A global MAC reset is issued to stop the
1583 * hardware, and all transmit and receive resources are freed.
1584 *
1585 * ISSUE: How closely does "netif_running(dev)" mirror "priv->active"?
1586 *
1587 * Before we are called by "__dev_close()", "netif_running()" will
1588 * have been cleared, so no NEW calls to "tile_net_poll()" will be
1589 * made by "netpoll_poll_dev()".
1590 *
1591 * Often, this can cause some tiles to still have packets in their
1592 * queues, so we must call "tile_net_discard_packets()" later.
1593 *
1594 * Note that some other tile may still be INSIDE "tile_net_poll()",
1595 * and in fact, many will be, if there is heavy network load.
1596 *
1597 * Calling "on_each_cpu(tile_net_stop_disable, (void *)dev, 1)" when
1598 * any tile is still "napi_schedule()"'d will induce a horrible crash
1599 * when "msleep()" is called. This includes tiles which are inside
1600 * "tile_net_poll()" which have not yet called "napi_complete()".
1601 *
1602 * So, we must first try to wait long enough for other tiles to finish
1603 * with any current "tile_net_poll()" call, and, hopefully, to clear
1604 * the "scheduled" flag. ISSUE: It is unclear what happens to tiles
1605 * which have called "napi_schedule()" but which had not yet tried to
1606 * call "tile_net_poll()", or which exhausted their budget inside
1607 * "tile_net_poll()" just before this function was called.
1608 */
1609static int tile_net_stop(struct net_device *dev)
1610{
1611 struct tile_net_priv *priv = netdev_priv(dev);
1612
1613 PDEBUG("tile_net_stop()\n");
1570 1614
1615 /* Start discarding packets. */
1616 priv->active = false;
1617
1618 /* Make sure "active" is visible to all tiles. */
1619 mb();
1571 1620
1572 /* 1621 /*
1573 * XXX: ISSUE: It appears that, in practice anyway, by the 1622 * On each tile, make sure no NEW packets get delivered, and
1574 * time we get here, there are no pending completions. 1623 * disable the ingress interrupt.
1624 *
1625 * Note that the ingress interrupt can fire AFTER this,
1626 * presumably due to packets which were recently delivered,
1627 * but it will have no effect.
1575 */ 1628 */
1576 while (pending) { 1629 on_each_cpu(tile_net_deregister, (void *)dev, 1);
1577 1630
1578 struct sk_buff *olds[32]; 1631 /* Optimistically drain LIPP buffers. */
1579 unsigned int wanted = 32; 1632 (void)tile_net_drain_lipp_buffers(priv);
1580 unsigned int i, nolds = 0;
1581 1633
1582 nolds = tile_net_lepp_grab_comps(dev, olds, 1634 /* ISSUE: Only needed if not yet fully open. */
1583 wanted, &pending); 1635 cancel_delayed_work_sync(&priv->retry_work);
1584 1636
1585 /* ISSUE: We have never actually seen this debug spew. */ 1637 /* Can't transmit any more. */
1586 if (nolds != 0) 1638 netif_stop_queue(dev);
1587 pr_info("During tile_net_stop(), grabbed %d comps.\n",
1588 nolds);
1589 1639
1590 for (i = 0; i < nolds; i++) 1640 /* Disable NAPI on each tile. */
1591 kfree_skb(olds[i]); 1641 on_each_cpu(tile_net_stop_disable, (void *)dev, 1);
1592 } 1642
1643 /*
1644 * Drain any remaining LIPP buffers. NOTE: This "printk()"
1645 * has never been observed, but in theory it could happen.
1646 */
1647 if (tile_net_drain_lipp_buffers(priv) != 0)
1648 printk("Had to drain some extra LIPP buffers!\n");
1593 1649
1650 /* Stop LIPP/LEPP. */
1651 tile_net_stop_aux(dev);
1652
1653 /*
1654 * ISSUE: It appears that, in practice anyway, by the time we
1655 * get here, there are no pending completions, but just in case,
1656 * we free (all of) them anyway.
1657 */
1658 while (tile_net_lepp_free_comps(dev, true))
1659 /* loop */;
1594 1660
1595 /* Wipe the EPP queue. */ 1661 /* Wipe the EPP queue. */
1596 memset(priv->epp_queue, 0, sizeof(lepp_queue_t)); 1662 memset(priv->eq, 0, sizeof(lepp_queue_t));
1597 1663
1598 /* Evict the EPP queue. */ 1664 /* Evict the EPP queue. */
1599 finv_buffer(priv->epp_queue, PAGE_SIZE); 1665 finv_buffer(priv->eq, EQ_SIZE);
1600 1666
1601 return 0; 1667 return 0;
1602} 1668}
@@ -1742,17 +1808,15 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1742 1808
1743 unsigned long irqflags; 1809 unsigned long irqflags;
1744 1810
1745 lepp_queue_t *eq = priv->epp_queue; 1811 lepp_queue_t *eq = priv->eq;
1746 1812
1747 struct sk_buff *olds[4]; 1813 struct sk_buff *olds[8];
1748 unsigned int wanted = 4; 1814 unsigned int wanted = 8;
1749 unsigned int i, nolds = 0; 1815 unsigned int i, nolds = 0;
1750 1816
1751 unsigned int cmd_head, cmd_tail, cmd_next; 1817 unsigned int cmd_head, cmd_tail, cmd_next;
1752 unsigned int comp_tail; 1818 unsigned int comp_tail;
1753 1819
1754 unsigned int free_slots;
1755
1756 1820
1757 /* Paranoia. */ 1821 /* Paranoia. */
1758 BUG_ON(skb->protocol != htons(ETH_P_IP)); 1822 BUG_ON(skb->protocol != htons(ETH_P_IP));
@@ -1780,34 +1844,32 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1780 1844
1781 /* Enqueue the command. */ 1845 /* Enqueue the command. */
1782 1846
1783 spin_lock_irqsave(&priv->cmd_lock, irqflags); 1847 spin_lock_irqsave(&priv->eq_lock, irqflags);
1784 1848
1785 /* 1849 /*
1786 * Handle completions if needed to make room. 1850 * Handle completions if needed to make room.
1787 * HACK: Spin until there is sufficient room. 1851 * HACK: Spin until there is sufficient room.
1788 */ 1852 */
1789 free_slots = lepp_num_free_comp_slots(eq); 1853 if (lepp_num_free_comp_slots(eq) == 0) {
1790 if (free_slots < 1) { 1854 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0);
1791spin: 1855 if (nolds == 0) {
1792 nolds += tile_net_lepp_grab_comps(dev, olds + nolds, 1856busy:
1793 wanted - nolds, NULL); 1857 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
1794 if (lepp_num_free_comp_slots(eq) < 1) 1858 return NETDEV_TX_BUSY;
1795 goto spin; 1859 }
1796 } 1860 }
1797 1861
1798 cmd_head = eq->cmd_head; 1862 cmd_head = eq->cmd_head;
1799 cmd_tail = eq->cmd_tail; 1863 cmd_tail = eq->cmd_tail;
1800 1864
1801 /* NOTE: The "gotos" below are untested. */
1802
1803 /* Prepare to advance, detecting full queue. */ 1865 /* Prepare to advance, detecting full queue. */
1804 cmd_next = cmd_tail + cmd_size; 1866 cmd_next = cmd_tail + cmd_size;
1805 if (cmd_tail < cmd_head && cmd_next >= cmd_head) 1867 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1806 goto spin; 1868 goto busy;
1807 if (cmd_next > LEPP_CMD_LIMIT) { 1869 if (cmd_next > LEPP_CMD_LIMIT) {
1808 cmd_next = 0; 1870 cmd_next = 0;
1809 if (cmd_next == cmd_head) 1871 if (cmd_next == cmd_head)
1810 goto spin; 1872 goto busy;
1811 } 1873 }
1812 1874
1813 /* Copy the command. */ 1875 /* Copy the command. */
@@ -1823,14 +1885,18 @@ spin:
1823 eq->comp_tail = comp_tail; 1885 eq->comp_tail = comp_tail;
1824 1886
1825 /* Flush before allowing LEPP to handle the command. */ 1887 /* Flush before allowing LEPP to handle the command. */
1888 /* ISSUE: Is this the optimal location for the flush? */
1826 __insn_mf(); 1889 __insn_mf();
1827 1890
1828 eq->cmd_tail = cmd_tail; 1891 eq->cmd_tail = cmd_tail;
1829 1892
1830 spin_unlock_irqrestore(&priv->cmd_lock, irqflags); 1893 /* NOTE: Using "4" here is more efficient than "0" or "2", */
1831 1894 /* and, strangely, more efficient than pre-checking the number */
1895 /* of available completions, and comparing it to 4. */
1832 if (nolds == 0) 1896 if (nolds == 0)
1833 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); 1897 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4);
1898
1899 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
1834 1900
1835 /* Handle completions. */ 1901 /* Handle completions. */
1836 for (i = 0; i < nolds; i++) 1902 for (i = 0; i < nolds; i++)
@@ -1870,10 +1936,10 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1870 1936
1871 unsigned int num_frags; 1937 unsigned int num_frags;
1872 1938
1873 lepp_queue_t *eq = priv->epp_queue; 1939 lepp_queue_t *eq = priv->eq;
1874 1940
1875 struct sk_buff *olds[4]; 1941 struct sk_buff *olds[8];
1876 unsigned int wanted = 4; 1942 unsigned int wanted = 8;
1877 unsigned int i, nolds = 0; 1943 unsigned int i, nolds = 0;
1878 1944
1879 unsigned int cmd_size = sizeof(lepp_cmd_t); 1945 unsigned int cmd_size = sizeof(lepp_cmd_t);
@@ -1883,8 +1949,6 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1883 1949
1884 lepp_cmd_t cmds[LEPP_MAX_FRAGS]; 1950 lepp_cmd_t cmds[LEPP_MAX_FRAGS];
1885 1951
1886 unsigned int free_slots;
1887
1888 1952
1889 /* 1953 /*
1890 * This is paranoia, since we think that if the link doesn't come 1954 * This is paranoia, since we think that if the link doesn't come
@@ -1905,7 +1969,8 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1905 if (hash_default) { 1969 if (hash_default) {
1906 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); 1970 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data);
1907 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) 1971 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
1908 panic("Non-coherent egress buffer!"); 1972 panic("Non-HFH egress buffer! VA=%p Mode=%d PTE=%llx",
1973 data, hv_pte_get_mode(pte), hv_pte_val(pte));
1909 } 1974 }
1910#endif 1975#endif
1911#endif 1976#endif
@@ -1958,37 +2023,35 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1958 2023
1959 /* Enqueue the commands. */ 2024 /* Enqueue the commands. */
1960 2025
1961 spin_lock_irqsave(&priv->cmd_lock, irqflags); 2026 spin_lock_irqsave(&priv->eq_lock, irqflags);
1962 2027
1963 /* 2028 /*
1964 * Handle completions if needed to make room. 2029 * Handle completions if needed to make room.
1965 * HACK: Spin until there is sufficient room. 2030 * HACK: Spin until there is sufficient room.
1966 */ 2031 */
1967 free_slots = lepp_num_free_comp_slots(eq); 2032 if (lepp_num_free_comp_slots(eq) == 0) {
1968 if (free_slots < 1) { 2033 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0);
1969spin: 2034 if (nolds == 0) {
1970 nolds += tile_net_lepp_grab_comps(dev, olds + nolds, 2035busy:
1971 wanted - nolds, NULL); 2036 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
1972 if (lepp_num_free_comp_slots(eq) < 1) 2037 return NETDEV_TX_BUSY;
1973 goto spin; 2038 }
1974 } 2039 }
1975 2040
1976 cmd_head = eq->cmd_head; 2041 cmd_head = eq->cmd_head;
1977 cmd_tail = eq->cmd_tail; 2042 cmd_tail = eq->cmd_tail;
1978 2043
1979 /* NOTE: The "gotos" below are untested. */
1980
1981 /* Copy the commands, or fail. */ 2044 /* Copy the commands, or fail. */
1982 for (i = 0; i < num_frags; i++) { 2045 for (i = 0; i < num_frags; i++) {
1983 2046
1984 /* Prepare to advance, detecting full queue. */ 2047 /* Prepare to advance, detecting full queue. */
1985 cmd_next = cmd_tail + cmd_size; 2048 cmd_next = cmd_tail + cmd_size;
1986 if (cmd_tail < cmd_head && cmd_next >= cmd_head) 2049 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1987 goto spin; 2050 goto busy;
1988 if (cmd_next > LEPP_CMD_LIMIT) { 2051 if (cmd_next > LEPP_CMD_LIMIT) {
1989 cmd_next = 0; 2052 cmd_next = 0;
1990 if (cmd_next == cmd_head) 2053 if (cmd_next == cmd_head)
1991 goto spin; 2054 goto busy;
1992 } 2055 }
1993 2056
1994 /* Copy the command. */ 2057 /* Copy the command. */
@@ -2005,14 +2068,18 @@ spin:
2005 eq->comp_tail = comp_tail; 2068 eq->comp_tail = comp_tail;
2006 2069
2007 /* Flush before allowing LEPP to handle the command. */ 2070 /* Flush before allowing LEPP to handle the command. */
2071 /* ISSUE: Is this the optimal location for the flush? */
2008 __insn_mf(); 2072 __insn_mf();
2009 2073
2010 eq->cmd_tail = cmd_tail; 2074 eq->cmd_tail = cmd_tail;
2011 2075
2012 spin_unlock_irqrestore(&priv->cmd_lock, irqflags); 2076 /* NOTE: Using "4" here is more efficient than "0" or "2", */
2013 2077 /* and, strangely, more efficient than pre-checking the number */
2078 /* of available completions, and comparing it to 4. */
2014 if (nolds == 0) 2079 if (nolds == 0)
2015 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); 2080 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4);
2081
2082 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
2016 2083
2017 /* Handle completions. */ 2084 /* Handle completions. */
2018 for (i = 0; i < nolds; i++) 2085 for (i = 0; i < nolds; i++)
@@ -2261,7 +2328,6 @@ static struct net_device *tile_net_dev_init(const char *name)
2261 int ret; 2328 int ret;
2262 struct net_device *dev; 2329 struct net_device *dev;
2263 struct tile_net_priv *priv; 2330 struct tile_net_priv *priv;
2264 struct page *page;
2265 2331
2266 /* 2332 /*
2267 * Allocate the device structure. This allocates "priv", calls 2333 * Allocate the device structure. This allocates "priv", calls
@@ -2285,23 +2351,21 @@ static struct net_device *tile_net_dev_init(const char *name)
2285 2351
2286 INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); 2352 INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry);
2287 2353
2288 spin_lock_init(&priv->cmd_lock); 2354 spin_lock_init(&priv->eq_lock);
2289 spin_lock_init(&priv->comp_lock);
2290 2355
2291 /* Allocate "epp_queue". */ 2356 /* Allocate "eq". */
2292 BUG_ON(get_order(sizeof(lepp_queue_t)) != 0); 2357 priv->eq_pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, EQ_ORDER);
2293 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); 2358 if (!priv->eq_pages) {
2294 if (!page) {
2295 free_netdev(dev); 2359 free_netdev(dev);
2296 return NULL; 2360 return NULL;
2297 } 2361 }
2298 priv->epp_queue = page_address(page); 2362 priv->eq = page_address(priv->eq_pages);
2299 2363
2300 /* Register the network device. */ 2364 /* Register the network device. */
2301 ret = register_netdev(dev); 2365 ret = register_netdev(dev);
2302 if (ret) { 2366 if (ret) {
2303 pr_err("register_netdev %s failed %d\n", dev->name, ret); 2367 pr_err("register_netdev %s failed %d\n", dev->name, ret);
2304 free_page((unsigned long)priv->epp_queue); 2368 __free_pages(priv->eq_pages, EQ_ORDER);
2305 free_netdev(dev); 2369 free_netdev(dev);
2306 return NULL; 2370 return NULL;
2307 } 2371 }
@@ -2310,7 +2374,7 @@ static struct net_device *tile_net_dev_init(const char *name)
2310 ret = tile_net_get_mac(dev); 2374 ret = tile_net_get_mac(dev);
2311 if (ret < 0) { 2375 if (ret < 0) {
2312 unregister_netdev(dev); 2376 unregister_netdev(dev);
2313 free_page((unsigned long)priv->epp_queue); 2377 __free_pages(priv->eq_pages, EQ_ORDER);
2314 free_netdev(dev); 2378 free_netdev(dev);
2315 return NULL; 2379 return NULL;
2316 } 2380 }
@@ -2321,6 +2385,9 @@ static struct net_device *tile_net_dev_init(const char *name)
2321 2385
2322/* 2386/*
2323 * Module cleanup. 2387 * Module cleanup.
2388 *
2389 * FIXME: If compiled as a module, this module cannot be "unloaded",
2390 * because the "ingress interrupt handler" is registered permanently.
2324 */ 2391 */
2325static void tile_net_cleanup(void) 2392static void tile_net_cleanup(void)
2326{ 2393{
@@ -2331,8 +2398,8 @@ static void tile_net_cleanup(void)
2331 struct net_device *dev = tile_net_devs[i]; 2398 struct net_device *dev = tile_net_devs[i];
2332 struct tile_net_priv *priv = netdev_priv(dev); 2399 struct tile_net_priv *priv = netdev_priv(dev);
2333 unregister_netdev(dev); 2400 unregister_netdev(dev);
2334 finv_buffer(priv->epp_queue, PAGE_SIZE); 2401 finv_buffer(priv->eq, EQ_SIZE);
2335 free_page((unsigned long)priv->epp_queue); 2402 __free_pages(priv->eq_pages, EQ_ORDER);
2336 free_netdev(dev); 2403 free_netdev(dev);
2337 } 2404 }
2338 } 2405 }
@@ -2355,7 +2422,12 @@ static int tile_net_init_module(void)
2355} 2422}
2356 2423
2357 2424
2425module_init(tile_net_init_module);
2426module_exit(tile_net_cleanup);
2427
2428
2358#ifndef MODULE 2429#ifndef MODULE
2430
2359/* 2431/*
2360 * The "network_cpus" boot argument specifies the cpus that are dedicated 2432 * The "network_cpus" boot argument specifies the cpus that are dedicated
2361 * to handle ingress packets. 2433 * to handle ingress packets.
@@ -2391,8 +2463,5 @@ static int __init network_cpus_setup(char *str)
2391 return 0; 2463 return 0;
2392} 2464}
2393__setup("network_cpus=", network_cpus_setup); 2465__setup("network_cpus=", network_cpus_setup);
2394#endif
2395
2396 2466
2397module_init(tile_net_init_module); 2467#endif
2398module_exit(tile_net_cleanup);