diff options
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/tile/tilepro.c | 965 |
1 files changed, 517 insertions, 448 deletions
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c index 7cb301da7474..0825db6d883f 100644 --- a/drivers/net/tile/tilepro.c +++ b/drivers/net/tile/tilepro.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | 2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
@@ -44,10 +44,6 @@ | |||
44 | #include <linux/tcp.h> | 44 | #include <linux/tcp.h> |
45 | 45 | ||
46 | 46 | ||
47 | /* There is no singlethread_cpu, so schedule work on the current cpu. */ | ||
48 | #define singlethread_cpu -1 | ||
49 | |||
50 | |||
51 | /* | 47 | /* |
52 | * First, "tile_net_init_module()" initializes all four "devices" which | 48 | * First, "tile_net_init_module()" initializes all four "devices" which |
53 | * can be used by linux. | 49 | * can be used by linux. |
@@ -73,15 +69,16 @@ | |||
73 | * return, knowing we will be called again later. Otherwise, we | 69 | * return, knowing we will be called again later. Otherwise, we |
74 | * reenable the ingress interrupt, and call "napi_complete()". | 70 | * reenable the ingress interrupt, and call "napi_complete()". |
75 | * | 71 | * |
72 | * HACK: Since disabling the ingress interrupt is not reliable, we | ||
73 | * ignore the interrupt if the global "active" flag is false. | ||
74 | * | ||
76 | * | 75 | * |
77 | * NOTE: The use of "native_driver" ensures that EPP exists, and that | 76 | * NOTE: The use of "native_driver" ensures that EPP exists, and that |
78 | * "epp_sendv" is legal, and that "LIPP" is being used. | 77 | * we are using "LIPP" and "LEPP". |
79 | * | 78 | * |
80 | * NOTE: Failing to free completions for an arbitrarily long time | 79 | * NOTE: Failing to free completions for an arbitrarily long time |
81 | * (which is defined to be illegal) does in fact cause bizarre | 80 | * (which is defined to be illegal) does in fact cause bizarre |
82 | * problems. The "egress_timer" helps prevent this from happening. | 81 | * problems. The "egress_timer" helps prevent this from happening. |
83 | * | ||
84 | * NOTE: The egress code can be interrupted by the interrupt handler. | ||
85 | */ | 82 | */ |
86 | 83 | ||
87 | 84 | ||
@@ -142,6 +139,7 @@ | |||
142 | MODULE_AUTHOR("Tilera"); | 139 | MODULE_AUTHOR("Tilera"); |
143 | MODULE_LICENSE("GPL"); | 140 | MODULE_LICENSE("GPL"); |
144 | 141 | ||
142 | |||
145 | /* | 143 | /* |
146 | * Queue of incoming packets for a specific cpu and device. | 144 | * Queue of incoming packets for a specific cpu and device. |
147 | * | 145 | * |
@@ -177,7 +175,7 @@ struct tile_net_cpu { | |||
177 | struct tile_netio_queue queue; | 175 | struct tile_netio_queue queue; |
178 | /* Statistics. */ | 176 | /* Statistics. */ |
179 | struct tile_net_stats_t stats; | 177 | struct tile_net_stats_t stats; |
180 | /* ISSUE: Is this needed? */ | 178 | /* True iff NAPI is enabled. */ |
181 | bool napi_enabled; | 179 | bool napi_enabled; |
182 | /* True if this tile has succcessfully registered with the IPP. */ | 180 | /* True if this tile has succcessfully registered with the IPP. */ |
183 | bool registered; | 181 | bool registered; |
@@ -200,20 +198,20 @@ struct tile_net_cpu { | |||
200 | struct tile_net_priv { | 198 | struct tile_net_priv { |
201 | /* Our network device. */ | 199 | /* Our network device. */ |
202 | struct net_device *dev; | 200 | struct net_device *dev; |
203 | /* The actual egress queue. */ | 201 | /* Pages making up the egress queue. */ |
204 | lepp_queue_t *epp_queue; | 202 | struct page *eq_pages; |
205 | /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */ | 203 | /* Address of the actual egress queue. */ |
206 | spinlock_t cmd_lock; | 204 | lepp_queue_t *eq; |
207 | /* Protects "epp_queue->comp_head". */ | 205 | /* Protects "eq". */ |
208 | spinlock_t comp_lock; | 206 | spinlock_t eq_lock; |
209 | /* The hypervisor handle for this interface. */ | 207 | /* The hypervisor handle for this interface. */ |
210 | int hv_devhdl; | 208 | int hv_devhdl; |
211 | /* The intr bit mask that IDs this device. */ | 209 | /* The intr bit mask that IDs this device. */ |
212 | u32 intr_id; | 210 | u32 intr_id; |
213 | /* True iff "tile_net_open_aux()" has succeeded. */ | 211 | /* True iff "tile_net_open_aux()" has succeeded. */ |
214 | int partly_opened; | 212 | bool partly_opened; |
215 | /* True iff "tile_net_open_inner()" has succeeded. */ | 213 | /* True iff the device is "active". */ |
216 | int fully_opened; | 214 | bool active; |
217 | /* Effective network cpus. */ | 215 | /* Effective network cpus. */ |
218 | struct cpumask network_cpus_map; | 216 | struct cpumask network_cpus_map; |
219 | /* Number of network cpus. */ | 217 | /* Number of network cpus. */ |
@@ -228,6 +226,10 @@ struct tile_net_priv { | |||
228 | struct tile_net_cpu *cpu[NR_CPUS]; | 226 | struct tile_net_cpu *cpu[NR_CPUS]; |
229 | }; | 227 | }; |
230 | 228 | ||
229 | /* Log2 of the number of small pages needed for the egress queue. */ | ||
230 | #define EQ_ORDER get_order(sizeof(lepp_queue_t)) | ||
231 | /* Size of the egress queue's pages. */ | ||
232 | #define EQ_SIZE (1 << (PAGE_SHIFT + EQ_ORDER)) | ||
231 | 233 | ||
232 | /* | 234 | /* |
233 | * The actual devices (xgbe0, xgbe1, gbe0, gbe1). | 235 | * The actual devices (xgbe0, xgbe1, gbe0, gbe1). |
@@ -284,7 +286,11 @@ static void net_printk(char *fmt, ...) | |||
284 | */ | 286 | */ |
285 | static void dump_packet(unsigned char *data, unsigned long length, char *s) | 287 | static void dump_packet(unsigned char *data, unsigned long length, char *s) |
286 | { | 288 | { |
289 | int my_cpu = smp_processor_id(); | ||
290 | |||
287 | unsigned long i; | 291 | unsigned long i; |
292 | char buf[128]; | ||
293 | |||
288 | static unsigned int count; | 294 | static unsigned int count; |
289 | 295 | ||
290 | pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", | 296 | pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", |
@@ -294,10 +300,12 @@ static void dump_packet(unsigned char *data, unsigned long length, char *s) | |||
294 | 300 | ||
295 | for (i = 0; i < length; i++) { | 301 | for (i = 0; i < length; i++) { |
296 | if ((i & 0xf) == 0) | 302 | if ((i & 0xf) == 0) |
297 | sprintf(buf, "%8.8lx:", i); | 303 | sprintf(buf, "[%02d] %8.8lx:", my_cpu, i); |
298 | sprintf(buf + strlen(buf), " %2.2x", data[i]); | 304 | sprintf(buf + strlen(buf), " %2.2x", data[i]); |
299 | if ((i & 0xf) == 0xf || i == length - 1) | 305 | if ((i & 0xf) == 0xf || i == length - 1) { |
300 | pr_info("%s\n", buf); | 306 | strcat(buf, "\n"); |
307 | pr_info("%s", buf); | ||
308 | } | ||
301 | } | 309 | } |
302 | } | 310 | } |
303 | #endif | 311 | #endif |
@@ -351,60 +359,109 @@ static void tile_net_provide_linux_buffer(struct tile_net_cpu *info, | |||
351 | 359 | ||
352 | /* | 360 | /* |
353 | * Provide a linux buffer for LIPP. | 361 | * Provide a linux buffer for LIPP. |
362 | * | ||
363 | * Note that the ACTUAL allocation for each buffer is a "struct sk_buff", | ||
364 | * plus a chunk of memory that includes not only the requested bytes, but | ||
365 | * also NET_SKB_PAD bytes of initial padding, and a "struct skb_shared_info". | ||
366 | * | ||
367 | * Note that "struct skb_shared_info" is 88 bytes with 64K pages and | ||
368 | * 268 bytes with 4K pages (since the frags[] array needs 18 entries). | ||
369 | * | ||
370 | * Without jumbo packets, the maximum packet size will be 1536 bytes, | ||
371 | * and we use 2 bytes (NET_IP_ALIGN) of padding. ISSUE: If we told | ||
372 | * the hardware to clip at 1518 bytes instead of 1536 bytes, then we | ||
373 | * could save an entire cache line, but in practice, we don't need it. | ||
374 | * | ||
375 | * Since CPAs are 38 bits, and we can only encode the high 31 bits in | ||
376 | * a "linux_buffer_t", the low 7 bits must be zero, and thus, we must | ||
377 | * align the actual "va" mod 128. | ||
378 | * | ||
379 | * We assume that the underlying "head" will be aligned mod 64. Note | ||
380 | * that in practice, we have seen "head" NOT aligned mod 128 even when | ||
381 | * using 2048 byte allocations, which is surprising. | ||
382 | * | ||
383 | * If "head" WAS always aligned mod 128, we could change LIPP to | ||
384 | * assume that the low SIX bits are zero, and the 7th bit is one, that | ||
385 | * is, align the actual "va" mod 128 plus 64, which would be "free". | ||
386 | * | ||
387 | * For now, the actual "head" pointer points at NET_SKB_PAD bytes of | ||
388 | * padding, plus 28 or 92 bytes of extra padding, plus the sk_buff | ||
389 | * pointer, plus the NET_IP_ALIGN padding, plus 126 or 1536 bytes for | ||
390 | * the actual packet, plus 62 bytes of empty padding, plus some | ||
391 | * padding and the "struct skb_shared_info". | ||
392 | * | ||
393 | * With 64K pages, a large buffer thus needs 32+92+4+2+1536+62+88 | ||
394 | * bytes, or 1816 bytes, which fits comfortably into 2048 bytes. | ||
395 | * | ||
396 | * With 64K pages, a small buffer thus needs 32+92+4+2+126+88 | ||
397 | * bytes, or 344 bytes, which means we are wasting 64+ bytes, and | ||
398 | * could presumably increase the size of small buffers. | ||
399 | * | ||
400 | * With 4K pages, a large buffer thus needs 32+92+4+2+1536+62+268 | ||
401 | * bytes, or 1996 bytes, which fits comfortably into 2048 bytes. | ||
402 | * | ||
403 | * With 4K pages, a small buffer thus needs 32+92+4+2+126+268 | ||
404 | * bytes, or 524 bytes, which is annoyingly wasteful. | ||
405 | * | ||
406 | * Maybe we should increase LIPP_SMALL_PACKET_SIZE to 192? | ||
407 | * | ||
408 | * ISSUE: Maybe we should increase "NET_SKB_PAD" to 64? | ||
354 | */ | 409 | */ |
355 | static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, | 410 | static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, |
356 | bool small) | 411 | bool small) |
357 | { | 412 | { |
358 | /* ISSUE: What should we use here? */ | 413 | #if TILE_NET_MTU <= 1536 |
414 | /* Without "jumbo", 2 + 1536 should be sufficient. */ | ||
415 | unsigned int large_size = NET_IP_ALIGN + 1536; | ||
416 | #else | ||
417 | /* ISSUE: This has not been tested. */ | ||
359 | unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; | 418 | unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; |
419 | #endif | ||
360 | 420 | ||
361 | /* Round up to ensure to avoid "false sharing" with last cache line. */ | 421 | /* Avoid "false sharing" with last cache line. */ |
362 | unsigned int buffer_size = | 422 | /* ISSUE: This is already done by "dev_alloc_skb()". */ |
423 | unsigned int len = | ||
363 | (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + | 424 | (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + |
364 | CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); | 425 | CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); |
365 | 426 | ||
366 | /* | 427 | unsigned int padding = 128 - NET_SKB_PAD; |
367 | * ISSUE: Since CPAs are 38 bits, and we can only encode the | 428 | unsigned int align; |
368 | * high 31 bits in a "linux_buffer_t", the low 7 bits must be | ||
369 | * zero, and thus, we must align the actual "va" mod 128. | ||
370 | */ | ||
371 | const unsigned long align = 128; | ||
372 | 429 | ||
373 | struct sk_buff *skb; | 430 | struct sk_buff *skb; |
374 | void *va; | 431 | void *va; |
375 | 432 | ||
376 | struct sk_buff **skb_ptr; | 433 | struct sk_buff **skb_ptr; |
377 | 434 | ||
378 | /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */ | 435 | /* Request 96 extra bytes for alignment purposes. */ |
379 | /* and also "reserves" that many bytes. */ | 436 | skb = dev_alloc_skb(len + padding); |
380 | /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */ | 437 | if (skb == NULL) |
381 | int len = sizeof(*skb_ptr) + align + buffer_size; | 438 | return false; |
382 | |||
383 | while (1) { | ||
384 | |||
385 | /* Allocate (or fail). */ | ||
386 | skb = dev_alloc_skb(len); | ||
387 | if (skb == NULL) | ||
388 | return false; | ||
389 | |||
390 | /* Make room for a back-pointer to 'skb'. */ | ||
391 | skb_reserve(skb, sizeof(*skb_ptr)); | ||
392 | 439 | ||
393 | /* Make sure we are aligned. */ | 440 | /* Skip 32 or 96 bytes to align "data" mod 128. */ |
394 | skb_reserve(skb, -(long)skb->data & (align - 1)); | 441 | align = -(long)skb->data & (128 - 1); |
442 | BUG_ON(align > padding); | ||
443 | skb_reserve(skb, align); | ||
395 | 444 | ||
396 | /* This address is given to IPP. */ | 445 | /* This address is given to IPP. */ |
397 | va = skb->data; | 446 | va = skb->data; |
398 | 447 | ||
399 | if (small) | 448 | /* Buffers must not span a huge page. */ |
400 | break; | 449 | BUG_ON(((((long)va & ~HPAGE_MASK) + len) & HPAGE_MASK) != 0); |
401 | 450 | ||
402 | /* ISSUE: This has never been observed! */ | 451 | #ifdef TILE_NET_PARANOIA |
403 | /* Large buffers must not span a huge page. */ | 452 | #if CHIP_HAS_CBOX_HOME_MAP() |
404 | if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0) | 453 | if (hash_default) { |
405 | break; | 454 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va); |
406 | pr_err("Leaking unaligned linux buffer at %p.\n", va); | 455 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) |
456 | panic("Non-HFH ingress buffer! VA=%p Mode=%d PTE=%llx", | ||
457 | va, hv_pte_get_mode(pte), hv_pte_val(pte)); | ||
407 | } | 458 | } |
459 | #endif | ||
460 | #endif | ||
461 | |||
462 | /* Invalidate the packet buffer. */ | ||
463 | if (!hash_default) | ||
464 | __inv_buffer(va, len); | ||
408 | 465 | ||
409 | /* Skip two bytes to satisfy LIPP assumptions. */ | 466 | /* Skip two bytes to satisfy LIPP assumptions. */ |
410 | /* Note that this aligns IP on a 16 byte boundary. */ | 467 | /* Note that this aligns IP on a 16 byte boundary. */ |
@@ -415,23 +472,9 @@ static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, | |||
415 | skb_ptr = va - sizeof(*skb_ptr); | 472 | skb_ptr = va - sizeof(*skb_ptr); |
416 | *skb_ptr = skb; | 473 | *skb_ptr = skb; |
417 | 474 | ||
418 | /* Invalidate the packet buffer. */ | ||
419 | if (!hash_default) | ||
420 | __inv_buffer(skb->data, buffer_size); | ||
421 | |||
422 | /* Make sure "skb_ptr" has been flushed. */ | 475 | /* Make sure "skb_ptr" has been flushed. */ |
423 | __insn_mf(); | 476 | __insn_mf(); |
424 | 477 | ||
425 | #ifdef TILE_NET_PARANOIA | ||
426 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
427 | if (hash_default) { | ||
428 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va); | ||
429 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) | ||
430 | panic("Non-coherent ingress buffer!"); | ||
431 | } | ||
432 | #endif | ||
433 | #endif | ||
434 | |||
435 | /* Provide the new buffer. */ | 478 | /* Provide the new buffer. */ |
436 | tile_net_provide_linux_buffer(info, va, small); | 479 | tile_net_provide_linux_buffer(info, va, small); |
437 | 480 | ||
@@ -469,48 +512,64 @@ oops: | |||
469 | * Grab some LEPP completions, and store them in "comps", of size | 512 | * Grab some LEPP completions, and store them in "comps", of size |
470 | * "comps_size", and return the number of completions which were | 513 | * "comps_size", and return the number of completions which were |
471 | * stored, so the caller can free them. | 514 | * stored, so the caller can free them. |
472 | * | ||
473 | * If "pending" is not NULL, it will be set to true if there might | ||
474 | * still be some pending completions caused by this tile, else false. | ||
475 | */ | 515 | */ |
476 | static unsigned int tile_net_lepp_grab_comps(struct net_device *dev, | 516 | static unsigned int tile_net_lepp_grab_comps(lepp_queue_t *eq, |
477 | struct sk_buff *comps[], | 517 | struct sk_buff *comps[], |
478 | unsigned int comps_size, | 518 | unsigned int comps_size, |
479 | bool *pending) | 519 | unsigned int min_size) |
480 | { | 520 | { |
481 | struct tile_net_priv *priv = netdev_priv(dev); | ||
482 | |||
483 | lepp_queue_t *eq = priv->epp_queue; | ||
484 | |||
485 | unsigned int n = 0; | 521 | unsigned int n = 0; |
486 | 522 | ||
487 | unsigned int comp_head; | 523 | unsigned int comp_head = eq->comp_head; |
488 | unsigned int comp_busy; | 524 | unsigned int comp_busy = eq->comp_busy; |
489 | unsigned int comp_tail; | ||
490 | |||
491 | spin_lock(&priv->comp_lock); | ||
492 | |||
493 | comp_head = eq->comp_head; | ||
494 | comp_busy = eq->comp_busy; | ||
495 | comp_tail = eq->comp_tail; | ||
496 | 525 | ||
497 | while (comp_head != comp_busy && n < comps_size) { | 526 | while (comp_head != comp_busy && n < comps_size) { |
498 | comps[n++] = eq->comps[comp_head]; | 527 | comps[n++] = eq->comps[comp_head]; |
499 | LEPP_QINC(comp_head); | 528 | LEPP_QINC(comp_head); |
500 | } | 529 | } |
501 | 530 | ||
502 | if (pending != NULL) | 531 | if (n < min_size) |
503 | *pending = (comp_head != comp_tail); | 532 | return 0; |
504 | 533 | ||
505 | eq->comp_head = comp_head; | 534 | eq->comp_head = comp_head; |
506 | 535 | ||
507 | spin_unlock(&priv->comp_lock); | ||
508 | |||
509 | return n; | 536 | return n; |
510 | } | 537 | } |
511 | 538 | ||
512 | 539 | ||
513 | /* | 540 | /* |
541 | * Free some comps, and return true iff there are still some pending. | ||
542 | */ | ||
543 | static bool tile_net_lepp_free_comps(struct net_device *dev, bool all) | ||
544 | { | ||
545 | struct tile_net_priv *priv = netdev_priv(dev); | ||
546 | |||
547 | lepp_queue_t *eq = priv->eq; | ||
548 | |||
549 | struct sk_buff *olds[64]; | ||
550 | unsigned int wanted = 64; | ||
551 | unsigned int i, n; | ||
552 | bool pending; | ||
553 | |||
554 | spin_lock(&priv->eq_lock); | ||
555 | |||
556 | if (all) | ||
557 | eq->comp_busy = eq->comp_tail; | ||
558 | |||
559 | n = tile_net_lepp_grab_comps(eq, olds, wanted, 0); | ||
560 | |||
561 | pending = (eq->comp_head != eq->comp_tail); | ||
562 | |||
563 | spin_unlock(&priv->eq_lock); | ||
564 | |||
565 | for (i = 0; i < n; i++) | ||
566 | kfree_skb(olds[i]); | ||
567 | |||
568 | return pending; | ||
569 | } | ||
570 | |||
571 | |||
572 | /* | ||
514 | * Make sure the egress timer is scheduled. | 573 | * Make sure the egress timer is scheduled. |
515 | * | 574 | * |
516 | * Note that we use "schedule if not scheduled" logic instead of the more | 575 | * Note that we use "schedule if not scheduled" logic instead of the more |
@@ -544,21 +603,11 @@ static void tile_net_handle_egress_timer(unsigned long arg) | |||
544 | struct tile_net_cpu *info = (struct tile_net_cpu *)arg; | 603 | struct tile_net_cpu *info = (struct tile_net_cpu *)arg; |
545 | struct net_device *dev = info->napi.dev; | 604 | struct net_device *dev = info->napi.dev; |
546 | 605 | ||
547 | struct sk_buff *olds[32]; | ||
548 | unsigned int wanted = 32; | ||
549 | unsigned int i, nolds = 0; | ||
550 | bool pending; | ||
551 | |||
552 | /* The timer is no longer scheduled. */ | 606 | /* The timer is no longer scheduled. */ |
553 | info->egress_timer_scheduled = false; | 607 | info->egress_timer_scheduled = false; |
554 | 608 | ||
555 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending); | 609 | /* Free comps, and reschedule timer if more are pending. */ |
556 | 610 | if (tile_net_lepp_free_comps(dev, false)) | |
557 | for (i = 0; i < nolds; i++) | ||
558 | kfree_skb(olds[i]); | ||
559 | |||
560 | /* Reschedule timer if needed. */ | ||
561 | if (pending) | ||
562 | tile_net_schedule_egress_timer(info); | 611 | tile_net_schedule_egress_timer(info); |
563 | } | 612 | } |
564 | 613 | ||
@@ -636,8 +685,39 @@ static bool is_dup_ack(char *s1, char *s2, unsigned int len) | |||
636 | 685 | ||
637 | 686 | ||
638 | 687 | ||
688 | static void tile_net_discard_aux(struct tile_net_cpu *info, int index) | ||
689 | { | ||
690 | struct tile_netio_queue *queue = &info->queue; | ||
691 | netio_queue_impl_t *qsp = queue->__system_part; | ||
692 | netio_queue_user_impl_t *qup = &queue->__user_part; | ||
693 | |||
694 | int index2_aux = index + sizeof(netio_pkt_t); | ||
695 | int index2 = | ||
696 | ((index2_aux == | ||
697 | qsp->__packet_receive_queue.__last_packet_plus_one) ? | ||
698 | 0 : index2_aux); | ||
699 | |||
700 | netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index); | ||
701 | |||
702 | /* Extract the "linux_buffer_t". */ | ||
703 | unsigned int buffer = pkt->__packet.word; | ||
704 | |||
705 | /* Convert "linux_buffer_t" to "va". */ | ||
706 | void *va = __va((phys_addr_t)(buffer >> 1) << 7); | ||
707 | |||
708 | /* Acquire the associated "skb". */ | ||
709 | struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); | ||
710 | struct sk_buff *skb = *skb_ptr; | ||
711 | |||
712 | kfree_skb(skb); | ||
713 | |||
714 | /* Consume this packet. */ | ||
715 | qup->__packet_receive_read = index2; | ||
716 | } | ||
717 | |||
718 | |||
639 | /* | 719 | /* |
640 | * Like "tile_net_handle_packets()", but just discard packets. | 720 | * Like "tile_net_poll()", but just discard packets. |
641 | */ | 721 | */ |
642 | static void tile_net_discard_packets(struct net_device *dev) | 722 | static void tile_net_discard_packets(struct net_device *dev) |
643 | { | 723 | { |
@@ -650,32 +730,8 @@ static void tile_net_discard_packets(struct net_device *dev) | |||
650 | 730 | ||
651 | while (qup->__packet_receive_read != | 731 | while (qup->__packet_receive_read != |
652 | qsp->__packet_receive_queue.__packet_write) { | 732 | qsp->__packet_receive_queue.__packet_write) { |
653 | |||
654 | int index = qup->__packet_receive_read; | 733 | int index = qup->__packet_receive_read; |
655 | 734 | tile_net_discard_aux(info, index); | |
656 | int index2_aux = index + sizeof(netio_pkt_t); | ||
657 | int index2 = | ||
658 | ((index2_aux == | ||
659 | qsp->__packet_receive_queue.__last_packet_plus_one) ? | ||
660 | 0 : index2_aux); | ||
661 | |||
662 | netio_pkt_t *pkt = (netio_pkt_t *) | ||
663 | ((unsigned long) &qsp[1] + index); | ||
664 | |||
665 | /* Extract the "linux_buffer_t". */ | ||
666 | unsigned int buffer = pkt->__packet.word; | ||
667 | |||
668 | /* Convert "linux_buffer_t" to "va". */ | ||
669 | void *va = __va((phys_addr_t)(buffer >> 1) << 7); | ||
670 | |||
671 | /* Acquire the associated "skb". */ | ||
672 | struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); | ||
673 | struct sk_buff *skb = *skb_ptr; | ||
674 | |||
675 | kfree_skb(skb); | ||
676 | |||
677 | /* Consume this packet. */ | ||
678 | qup->__packet_receive_read = index2; | ||
679 | } | 735 | } |
680 | } | 736 | } |
681 | 737 | ||
@@ -704,7 +760,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
704 | 760 | ||
705 | netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); | 761 | netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); |
706 | 762 | ||
707 | /* Extract the packet size. */ | 763 | /* Extract the packet size. FIXME: Shouldn't the second line */ |
764 | /* get subtracted? Mostly moot, since it should be "zero". */ | ||
708 | unsigned long len = | 765 | unsigned long len = |
709 | (NETIO_PKT_CUSTOM_LENGTH(pkt) + | 766 | (NETIO_PKT_CUSTOM_LENGTH(pkt) + |
710 | NET_IP_ALIGN - NETIO_PACKET_PADDING); | 767 | NET_IP_ALIGN - NETIO_PACKET_PADDING); |
@@ -722,15 +779,6 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
722 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ | 779 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ |
723 | unsigned char *buf = va + NET_IP_ALIGN; | 780 | unsigned char *buf = va + NET_IP_ALIGN; |
724 | 781 | ||
725 | #ifdef IGNORE_DUP_ACKS | ||
726 | |||
727 | static int other; | ||
728 | static int final; | ||
729 | static int keep; | ||
730 | static int skip; | ||
731 | |||
732 | #endif | ||
733 | |||
734 | /* Invalidate the packet buffer. */ | 782 | /* Invalidate the packet buffer. */ |
735 | if (!hash_default) | 783 | if (!hash_default) |
736 | __inv_buffer(buf, len); | 784 | __inv_buffer(buf, len); |
@@ -745,16 +793,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
745 | #ifdef TILE_NET_VERIFY_INGRESS | 793 | #ifdef TILE_NET_VERIFY_INGRESS |
746 | if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && | 794 | if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && |
747 | NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { | 795 | NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { |
748 | /* | 796 | /* Bug 6624: Includes UDP packets with a "zero" checksum. */ |
749 | * FIXME: This complains about UDP packets | ||
750 | * with a "zero" checksum (bug 6624). | ||
751 | */ | ||
752 | #ifdef TILE_NET_PANIC_ON_BAD | ||
753 | dump_packet(buf, len, "rx"); | ||
754 | panic("Bad L4 checksum."); | ||
755 | #else | ||
756 | pr_warning("Bad L4 checksum on %d byte packet.\n", len); | 797 | pr_warning("Bad L4 checksum on %d byte packet.\n", len); |
757 | #endif | ||
758 | } | 798 | } |
759 | if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && | 799 | if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && |
760 | NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { | 800 | NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { |
@@ -769,90 +809,29 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
769 | } | 809 | } |
770 | break; | 810 | break; |
771 | case NETIO_PKT_STATUS_BAD: | 811 | case NETIO_PKT_STATUS_BAD: |
772 | #ifdef TILE_NET_PANIC_ON_BAD | 812 | pr_warning("Unexpected BAD %ld byte packet.\n", len); |
773 | dump_packet(buf, len, "rx"); | ||
774 | panic("Unexpected BAD packet."); | ||
775 | #else | ||
776 | pr_warning("Unexpected BAD %d byte packet.\n", len); | ||
777 | #endif | ||
778 | } | 813 | } |
779 | #endif | 814 | #endif |
780 | 815 | ||
781 | filter = 0; | 816 | filter = 0; |
782 | 817 | ||
818 | /* ISSUE: Filter TCP packets with "bad" checksums? */ | ||
819 | |||
783 | if (!(dev->flags & IFF_UP)) { | 820 | if (!(dev->flags & IFF_UP)) { |
784 | /* Filter packets received before we're up. */ | 821 | /* Filter packets received before we're up. */ |
785 | filter = 1; | 822 | filter = 1; |
823 | } else if (NETIO_PKT_STATUS_M(metadata, pkt) == NETIO_PKT_STATUS_BAD) { | ||
824 | /* Filter "truncated" packets. */ | ||
825 | filter = 1; | ||
786 | } else if (!(dev->flags & IFF_PROMISC)) { | 826 | } else if (!(dev->flags & IFF_PROMISC)) { |
787 | /* | 827 | /* FIXME: Implement HW multicast filter. */ |
788 | * FIXME: Implement HW multicast filter. | 828 | if (!is_multicast_ether_addr(buf)) { |
789 | */ | ||
790 | if (is_unicast_ether_addr(buf)) { | ||
791 | /* Filter packets not for our address. */ | 829 | /* Filter packets not for our address. */ |
792 | const u8 *mine = dev->dev_addr; | 830 | const u8 *mine = dev->dev_addr; |
793 | filter = compare_ether_addr(mine, buf); | 831 | filter = compare_ether_addr(mine, buf); |
794 | } | 832 | } |
795 | } | 833 | } |
796 | 834 | ||
797 | #ifdef IGNORE_DUP_ACKS | ||
798 | |||
799 | if (len != 66) { | ||
800 | /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */ | ||
801 | |||
802 | other++; | ||
803 | |||
804 | } else if (index2 == | ||
805 | qsp->__packet_receive_queue.__packet_write) { | ||
806 | |||
807 | final++; | ||
808 | |||
809 | } else { | ||
810 | |||
811 | netio_pkt_t *pkt2 = (netio_pkt_t *) | ||
812 | ((unsigned long) &qsp[1] + index2); | ||
813 | |||
814 | netio_pkt_metadata_t *metadata2 = | ||
815 | NETIO_PKT_METADATA(pkt2); | ||
816 | |||
817 | /* Extract the packet size. */ | ||
818 | unsigned long len2 = | ||
819 | (NETIO_PKT_CUSTOM_LENGTH(pkt2) + | ||
820 | NET_IP_ALIGN - NETIO_PACKET_PADDING); | ||
821 | |||
822 | if (len2 == 66 && | ||
823 | NETIO_PKT_FLOW_HASH_M(metadata, pkt) == | ||
824 | NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) { | ||
825 | |||
826 | /* Extract the "linux_buffer_t". */ | ||
827 | unsigned int buffer2 = pkt2->__packet.word; | ||
828 | |||
829 | /* Convert "linux_buffer_t" to "va". */ | ||
830 | void *va2 = | ||
831 | __va((phys_addr_t)(buffer2 >> 1) << 7); | ||
832 | |||
833 | /* Extract the packet data pointer. */ | ||
834 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ | ||
835 | unsigned char *buf2 = va2 + NET_IP_ALIGN; | ||
836 | |||
837 | /* Invalidate the packet buffer. */ | ||
838 | if (!hash_default) | ||
839 | __inv_buffer(buf2, len2); | ||
840 | |||
841 | if (is_dup_ack(buf, buf2, len)) { | ||
842 | skip++; | ||
843 | filter = 1; | ||
844 | } else { | ||
845 | keep++; | ||
846 | } | ||
847 | } | ||
848 | } | ||
849 | |||
850 | if (net_ratelimit()) | ||
851 | pr_info("Other %d Final %d Keep %d Skip %d.\n", | ||
852 | other, final, keep, skip); | ||
853 | |||
854 | #endif | ||
855 | |||
856 | if (filter) { | 835 | if (filter) { |
857 | 836 | ||
858 | /* ISSUE: Update "drop" statistics? */ | 837 | /* ISSUE: Update "drop" statistics? */ |
@@ -877,10 +856,7 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
877 | /* NOTE: This call also sets "skb->dev = dev". */ | 856 | /* NOTE: This call also sets "skb->dev = dev". */ |
878 | skb->protocol = eth_type_trans(skb, dev); | 857 | skb->protocol = eth_type_trans(skb, dev); |
879 | 858 | ||
880 | /* ISSUE: Discard corrupt packets? */ | 859 | /* Avoid recomputing "good" TCP/UDP checksums. */ |
881 | /* ISSUE: Discard packets with bad checksums? */ | ||
882 | |||
883 | /* Avoid recomputing TCP/UDP checksums. */ | ||
884 | if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) | 860 | if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) |
885 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 861 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
886 | 862 | ||
@@ -912,9 +888,14 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
912 | /* | 888 | /* |
913 | * Handle some packets for the given device on the current CPU. | 889 | * Handle some packets for the given device on the current CPU. |
914 | * | 890 | * |
915 | * ISSUE: The "rotting packet" race condition occurs if a packet | 891 | * If "tile_net_stop()" is called on some other tile while this |
916 | * arrives after the queue appears to be empty, and before the | 892 | * function is running, we will return, hopefully before that |
917 | * hypervisor interrupt is re-enabled. | 893 | * other tile asks us to call "napi_disable()". |
894 | * | ||
895 | * The "rotting packet" race condition occurs if a packet arrives | ||
896 | * during the extremely narrow window between the queue appearing to | ||
897 | * be empty, and the ingress interrupt being re-enabled. This happens | ||
898 | * a LOT under heavy network load. | ||
918 | */ | 899 | */ |
919 | static int tile_net_poll(struct napi_struct *napi, int budget) | 900 | static int tile_net_poll(struct napi_struct *napi, int budget) |
920 | { | 901 | { |
@@ -928,7 +909,7 @@ static int tile_net_poll(struct napi_struct *napi, int budget) | |||
928 | 909 | ||
929 | unsigned int work = 0; | 910 | unsigned int work = 0; |
930 | 911 | ||
931 | while (1) { | 912 | while (priv->active) { |
932 | int index = qup->__packet_receive_read; | 913 | int index = qup->__packet_receive_read; |
933 | if (index == qsp->__packet_receive_queue.__packet_write) | 914 | if (index == qsp->__packet_receive_queue.__packet_write) |
934 | break; | 915 | break; |
@@ -941,19 +922,24 @@ static int tile_net_poll(struct napi_struct *napi, int budget) | |||
941 | 922 | ||
942 | napi_complete(&info->napi); | 923 | napi_complete(&info->napi); |
943 | 924 | ||
944 | /* Re-enable hypervisor interrupts. */ | 925 | if (!priv->active) |
926 | goto done; | ||
927 | |||
928 | /* Re-enable the ingress interrupt. */ | ||
945 | enable_percpu_irq(priv->intr_id); | 929 | enable_percpu_irq(priv->intr_id); |
946 | 930 | ||
947 | /* HACK: Avoid the "rotting packet" problem. */ | 931 | /* HACK: Avoid the "rotting packet" problem (see above). */ |
948 | if (qup->__packet_receive_read != | 932 | if (qup->__packet_receive_read != |
949 | qsp->__packet_receive_queue.__packet_write) | 933 | qsp->__packet_receive_queue.__packet_write) { |
950 | napi_schedule(&info->napi); | 934 | /* ISSUE: Sometimes this returns zero, presumably */ |
951 | 935 | /* because an interrupt was handled for this tile. */ | |
952 | /* ISSUE: Handle completions? */ | 936 | (void)napi_reschedule(&info->napi); |
937 | } | ||
953 | 938 | ||
954 | done: | 939 | done: |
955 | 940 | ||
956 | tile_net_provide_needed_buffers(info); | 941 | if (priv->active) |
942 | tile_net_provide_needed_buffers(info); | ||
957 | 943 | ||
958 | return work; | 944 | return work; |
959 | } | 945 | } |
@@ -961,6 +947,12 @@ done: | |||
961 | 947 | ||
962 | /* | 948 | /* |
963 | * Handle an ingress interrupt for the given device on the current cpu. | 949 | * Handle an ingress interrupt for the given device on the current cpu. |
950 | * | ||
951 | * ISSUE: Sometimes this gets called after "disable_percpu_irq()" has | ||
952 | * been called! This is probably due to "pending hypervisor downcalls". | ||
953 | * | ||
954 | * ISSUE: Is there any race condition between the "napi_schedule()" here | ||
955 | * and the "napi_complete()" call above? | ||
964 | */ | 956 | */ |
965 | static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) | 957 | static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) |
966 | { | 958 | { |
@@ -969,9 +961,15 @@ static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) | |||
969 | int my_cpu = smp_processor_id(); | 961 | int my_cpu = smp_processor_id(); |
970 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 962 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
971 | 963 | ||
972 | /* Disable hypervisor interrupt. */ | 964 | /* Disable the ingress interrupt. */ |
973 | disable_percpu_irq(priv->intr_id); | 965 | disable_percpu_irq(priv->intr_id); |
974 | 966 | ||
967 | /* Ignore unwanted interrupts. */ | ||
968 | if (!priv->active) | ||
969 | return IRQ_HANDLED; | ||
970 | |||
971 | /* ISSUE: Sometimes "info->napi_enabled" is false here. */ | ||
972 | |||
975 | napi_schedule(&info->napi); | 973 | napi_schedule(&info->napi); |
976 | 974 | ||
977 | return IRQ_HANDLED; | 975 | return IRQ_HANDLED; |
@@ -1005,8 +1003,7 @@ static int tile_net_open_aux(struct net_device *dev) | |||
1005 | */ | 1003 | */ |
1006 | { | 1004 | { |
1007 | int epp_home = hv_lotar_to_cpu(epp_lotar); | 1005 | int epp_home = hv_lotar_to_cpu(epp_lotar); |
1008 | struct page *page = virt_to_page(priv->epp_queue); | 1006 | homecache_change_page_home(priv->eq_pages, EQ_ORDER, epp_home); |
1009 | homecache_change_page_home(page, 0, epp_home); | ||
1010 | } | 1007 | } |
1011 | 1008 | ||
1012 | /* | 1009 | /* |
@@ -1015,9 +1012,9 @@ static int tile_net_open_aux(struct net_device *dev) | |||
1015 | { | 1012 | { |
1016 | netio_ipp_address_t ea = { | 1013 | netio_ipp_address_t ea = { |
1017 | .va = 0, | 1014 | .va = 0, |
1018 | .pa = __pa(priv->epp_queue), | 1015 | .pa = __pa(priv->eq), |
1019 | .pte = hv_pte(0), | 1016 | .pte = hv_pte(0), |
1020 | .size = PAGE_SIZE, | 1017 | .size = EQ_SIZE, |
1021 | }; | 1018 | }; |
1022 | ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); | 1019 | ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); |
1023 | ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); | 1020 | ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); |
@@ -1043,7 +1040,7 @@ static int tile_net_open_aux(struct net_device *dev) | |||
1043 | 1040 | ||
1044 | 1041 | ||
1045 | /* | 1042 | /* |
1046 | * Register with hypervisor on each CPU. | 1043 | * Register with hypervisor on the current CPU. |
1047 | * | 1044 | * |
1048 | * Strangely, this function does important things even if it "fails", | 1045 | * Strangely, this function does important things even if it "fails", |
1049 | * which is especially common if the link is not up yet. Hopefully | 1046 | * which is especially common if the link is not up yet. Hopefully |
@@ -1092,7 +1089,8 @@ static void tile_net_register(void *dev_ptr) | |||
1092 | priv->cpu[my_cpu] = info; | 1089 | priv->cpu[my_cpu] = info; |
1093 | 1090 | ||
1094 | /* | 1091 | /* |
1095 | * Register ourselves with the IPP. | 1092 | * Register ourselves with LIPP. This does a lot of stuff, |
1093 | * including invoking the LIPP registration code. | ||
1096 | */ | 1094 | */ |
1097 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, | 1095 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, |
1098 | (HV_VirtAddr)&config, | 1096 | (HV_VirtAddr)&config, |
@@ -1101,8 +1099,11 @@ static void tile_net_register(void *dev_ptr) | |||
1101 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", | 1099 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", |
1102 | ret); | 1100 | ret); |
1103 | if (ret < 0) { | 1101 | if (ret < 0) { |
1104 | printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF" | 1102 | if (ret != NETIO_LINK_DOWN) { |
1105 | " failure %d\n", ret); | 1103 | printk(KERN_DEBUG "hv_dev_pwrite " |
1104 | "NETIO_IPP_INPUT_REGISTER_OFF failure %d\n", | ||
1105 | ret); | ||
1106 | } | ||
1106 | info->link_down = (ret == NETIO_LINK_DOWN); | 1107 | info->link_down = (ret == NETIO_LINK_DOWN); |
1107 | return; | 1108 | return; |
1108 | } | 1109 | } |
@@ -1145,15 +1146,47 @@ static void tile_net_register(void *dev_ptr) | |||
1145 | NETIO_IPP_GET_FASTIO_OFF); | 1146 | NETIO_IPP_GET_FASTIO_OFF); |
1146 | PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); | 1147 | PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); |
1147 | 1148 | ||
1148 | netif_napi_add(dev, &info->napi, tile_net_poll, 64); | ||
1149 | |||
1150 | /* Now we are registered. */ | 1149 | /* Now we are registered. */ |
1151 | info->registered = true; | 1150 | info->registered = true; |
1152 | } | 1151 | } |
1153 | 1152 | ||
1154 | 1153 | ||
1155 | /* | 1154 | /* |
1156 | * Unregister with hypervisor on each CPU. | 1155 | * Deregister with hypervisor on the current CPU. |
1156 | * | ||
1157 | * This simply discards all our credits, so no more packets will be | ||
1158 | * delivered to this tile. There may still be packets in our queue. | ||
1159 | * | ||
1160 | * Also, disable the ingress interrupt. | ||
1161 | */ | ||
1162 | static void tile_net_deregister(void *dev_ptr) | ||
1163 | { | ||
1164 | struct net_device *dev = (struct net_device *)dev_ptr; | ||
1165 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1166 | int my_cpu = smp_processor_id(); | ||
1167 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
1168 | |||
1169 | /* Disable the ingress interrupt. */ | ||
1170 | disable_percpu_irq(priv->intr_id); | ||
1171 | |||
1172 | /* Do nothing else if not registered. */ | ||
1173 | if (info == NULL || !info->registered) | ||
1174 | return; | ||
1175 | |||
1176 | { | ||
1177 | struct tile_netio_queue *queue = &info->queue; | ||
1178 | netio_queue_user_impl_t *qup = &queue->__user_part; | ||
1179 | |||
1180 | /* Discard all our credits. */ | ||
1181 | __netio_fastio_return_credits(qup->__fastio_index, -1); | ||
1182 | } | ||
1183 | } | ||
1184 | |||
1185 | |||
1186 | /* | ||
1187 | * Unregister with hypervisor on the current CPU. | ||
1188 | * | ||
1189 | * Also, disable the ingress interrupt. | ||
1157 | */ | 1190 | */ |
1158 | static void tile_net_unregister(void *dev_ptr) | 1191 | static void tile_net_unregister(void *dev_ptr) |
1159 | { | 1192 | { |
@@ -1162,35 +1195,23 @@ static void tile_net_unregister(void *dev_ptr) | |||
1162 | int my_cpu = smp_processor_id(); | 1195 | int my_cpu = smp_processor_id(); |
1163 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 1196 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
1164 | 1197 | ||
1165 | int ret = 0; | 1198 | int ret; |
1166 | int dummy = 0; | 1199 | int dummy = 0; |
1167 | 1200 | ||
1168 | /* Do nothing if never registered. */ | 1201 | /* Disable the ingress interrupt. */ |
1169 | if (info == NULL) | 1202 | disable_percpu_irq(priv->intr_id); |
1170 | return; | ||
1171 | 1203 | ||
1172 | /* Do nothing if already unregistered. */ | 1204 | /* Do nothing else if not registered. */ |
1173 | if (!info->registered) | 1205 | if (info == NULL || !info->registered) |
1174 | return; | 1206 | return; |
1175 | 1207 | ||
1176 | /* | 1208 | /* Unregister ourselves with LIPP/LEPP. */ |
1177 | * Unregister ourselves with LIPP. | ||
1178 | */ | ||
1179 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | 1209 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, |
1180 | sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); | 1210 | sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); |
1181 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n", | 1211 | if (ret < 0) |
1182 | ret); | 1212 | panic("Failed to unregister with LIPP/LEPP!\n"); |
1183 | if (ret < 0) { | ||
1184 | /* FIXME: Just panic? */ | ||
1185 | pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF" | ||
1186 | " failure %d\n", ret); | ||
1187 | } | ||
1188 | 1213 | ||
1189 | /* | 1214 | /* Discard all packets still in our NetIO queue. */ |
1190 | * Discard all packets still in our NetIO queue. Hopefully, | ||
1191 | * once the unregister call is complete, there will be no | ||
1192 | * packets still in flight on the IDN. | ||
1193 | */ | ||
1194 | tile_net_discard_packets(dev); | 1215 | tile_net_discard_packets(dev); |
1195 | 1216 | ||
1196 | /* Reset state. */ | 1217 | /* Reset state. */ |
@@ -1200,11 +1221,6 @@ static void tile_net_unregister(void *dev_ptr) | |||
1200 | /* Cancel egress timer. */ | 1221 | /* Cancel egress timer. */ |
1201 | del_timer(&info->egress_timer); | 1222 | del_timer(&info->egress_timer); |
1202 | info->egress_timer_scheduled = false; | 1223 | info->egress_timer_scheduled = false; |
1203 | |||
1204 | netif_napi_del(&info->napi); | ||
1205 | |||
1206 | /* Now we are unregistered. */ | ||
1207 | info->registered = false; | ||
1208 | } | 1224 | } |
1209 | 1225 | ||
1210 | 1226 | ||
@@ -1212,18 +1228,28 @@ static void tile_net_unregister(void *dev_ptr) | |||
1212 | * Helper function for "tile_net_stop()". | 1228 | * Helper function for "tile_net_stop()". |
1213 | * | 1229 | * |
1214 | * Also used to handle registration failure in "tile_net_open_inner()", | 1230 | * Also used to handle registration failure in "tile_net_open_inner()", |
1215 | * when "fully_opened" is known to be false, and the various extra | 1231 | * when the various extra steps in "tile_net_stop()" are not necessary. |
1216 | * steps in "tile_net_stop()" are not necessary. ISSUE: It might be | ||
1217 | * simpler if we could just call "tile_net_stop()" anyway. | ||
1218 | */ | 1232 | */ |
1219 | static void tile_net_stop_aux(struct net_device *dev) | 1233 | static void tile_net_stop_aux(struct net_device *dev) |
1220 | { | 1234 | { |
1221 | struct tile_net_priv *priv = netdev_priv(dev); | 1235 | struct tile_net_priv *priv = netdev_priv(dev); |
1236 | int i; | ||
1222 | 1237 | ||
1223 | int dummy = 0; | 1238 | int dummy = 0; |
1224 | 1239 | ||
1225 | /* Unregister all tiles, so LIPP will stop delivering packets. */ | 1240 | /* |
1241 | * Unregister all tiles, so LIPP will stop delivering packets. | ||
1242 | * Also, delete all the "napi" objects (sequentially, to protect | ||
1243 | * "dev->napi_list"). | ||
1244 | */ | ||
1226 | on_each_cpu(tile_net_unregister, (void *)dev, 1); | 1245 | on_each_cpu(tile_net_unregister, (void *)dev, 1); |
1246 | for_each_online_cpu(i) { | ||
1247 | struct tile_net_cpu *info = priv->cpu[i]; | ||
1248 | if (info != NULL && info->registered) { | ||
1249 | netif_napi_del(&info->napi); | ||
1250 | info->registered = false; | ||
1251 | } | ||
1252 | } | ||
1227 | 1253 | ||
1228 | /* Stop LIPP/LEPP. */ | 1254 | /* Stop LIPP/LEPP. */ |
1229 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | 1255 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, |
@@ -1235,18 +1261,15 @@ static void tile_net_stop_aux(struct net_device *dev) | |||
1235 | 1261 | ||
1236 | 1262 | ||
1237 | /* | 1263 | /* |
1238 | * Disable ingress interrupts for the given device on the current cpu. | 1264 | * Disable NAPI for the given device on the current cpu. |
1239 | */ | 1265 | */ |
1240 | static void tile_net_disable_intr(void *dev_ptr) | 1266 | static void tile_net_stop_disable(void *dev_ptr) |
1241 | { | 1267 | { |
1242 | struct net_device *dev = (struct net_device *)dev_ptr; | 1268 | struct net_device *dev = (struct net_device *)dev_ptr; |
1243 | struct tile_net_priv *priv = netdev_priv(dev); | 1269 | struct tile_net_priv *priv = netdev_priv(dev); |
1244 | int my_cpu = smp_processor_id(); | 1270 | int my_cpu = smp_processor_id(); |
1245 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 1271 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
1246 | 1272 | ||
1247 | /* Disable hypervisor interrupt. */ | ||
1248 | disable_percpu_irq(priv->intr_id); | ||
1249 | |||
1250 | /* Disable NAPI if needed. */ | 1273 | /* Disable NAPI if needed. */ |
1251 | if (info != NULL && info->napi_enabled) { | 1274 | if (info != NULL && info->napi_enabled) { |
1252 | napi_disable(&info->napi); | 1275 | napi_disable(&info->napi); |
@@ -1256,21 +1279,24 @@ static void tile_net_disable_intr(void *dev_ptr) | |||
1256 | 1279 | ||
1257 | 1280 | ||
1258 | /* | 1281 | /* |
1259 | * Enable ingress interrupts for the given device on the current cpu. | 1282 | * Enable NAPI and the ingress interrupt for the given device |
1283 | * on the current cpu. | ||
1284 | * | ||
1285 | * ISSUE: Only do this for "network cpus"? | ||
1260 | */ | 1286 | */ |
1261 | static void tile_net_enable_intr(void *dev_ptr) | 1287 | static void tile_net_open_enable(void *dev_ptr) |
1262 | { | 1288 | { |
1263 | struct net_device *dev = (struct net_device *)dev_ptr; | 1289 | struct net_device *dev = (struct net_device *)dev_ptr; |
1264 | struct tile_net_priv *priv = netdev_priv(dev); | 1290 | struct tile_net_priv *priv = netdev_priv(dev); |
1265 | int my_cpu = smp_processor_id(); | 1291 | int my_cpu = smp_processor_id(); |
1266 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 1292 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
1267 | 1293 | ||
1268 | /* Enable hypervisor interrupt. */ | ||
1269 | enable_percpu_irq(priv->intr_id); | ||
1270 | |||
1271 | /* Enable NAPI. */ | 1294 | /* Enable NAPI. */ |
1272 | napi_enable(&info->napi); | 1295 | napi_enable(&info->napi); |
1273 | info->napi_enabled = true; | 1296 | info->napi_enabled = true; |
1297 | |||
1298 | /* Enable the ingress interrupt. */ | ||
1299 | enable_percpu_irq(priv->intr_id); | ||
1274 | } | 1300 | } |
1275 | 1301 | ||
1276 | 1302 | ||
@@ -1288,8 +1314,9 @@ static int tile_net_open_inner(struct net_device *dev) | |||
1288 | int my_cpu = smp_processor_id(); | 1314 | int my_cpu = smp_processor_id(); |
1289 | struct tile_net_cpu *info; | 1315 | struct tile_net_cpu *info; |
1290 | struct tile_netio_queue *queue; | 1316 | struct tile_netio_queue *queue; |
1291 | unsigned int irq; | 1317 | int result = 0; |
1292 | int i; | 1318 | int i; |
1319 | int dummy = 0; | ||
1293 | 1320 | ||
1294 | /* | 1321 | /* |
1295 | * First try to register just on the local CPU, and handle any | 1322 | * First try to register just on the local CPU, and handle any |
@@ -1307,42 +1334,52 @@ static int tile_net_open_inner(struct net_device *dev) | |||
1307 | /* | 1334 | /* |
1308 | * Now register everywhere else. If any registration fails, | 1335 | * Now register everywhere else. If any registration fails, |
1309 | * even for "link down" (which might not be possible), we | 1336 | * even for "link down" (which might not be possible), we |
1310 | * clean up using "tile_net_stop_aux()". | 1337 | * clean up using "tile_net_stop_aux()". Also, add all the |
1338 | * "napi" objects (sequentially, to protect "dev->napi_list"). | ||
1339 | * ISSUE: Only use "netif_napi_add()" for "network cpus"? | ||
1311 | */ | 1340 | */ |
1312 | smp_call_function(tile_net_register, (void *)dev, 1); | 1341 | smp_call_function(tile_net_register, (void *)dev, 1); |
1313 | for_each_online_cpu(i) { | 1342 | for_each_online_cpu(i) { |
1314 | if (!priv->cpu[i]->registered) { | 1343 | struct tile_net_cpu *info = priv->cpu[i]; |
1315 | tile_net_stop_aux(dev); | 1344 | if (info->registered) |
1316 | return -EAGAIN; | 1345 | netif_napi_add(dev, &info->napi, tile_net_poll, 64); |
1317 | } | 1346 | else |
1347 | result = -EAGAIN; | ||
1348 | } | ||
1349 | if (result != 0) { | ||
1350 | tile_net_stop_aux(dev); | ||
1351 | return result; | ||
1318 | } | 1352 | } |
1319 | 1353 | ||
1320 | queue = &info->queue; | 1354 | queue = &info->queue; |
1321 | 1355 | ||
1322 | /* | 1356 | if (priv->intr_id == 0) { |
1323 | * Set the device intr bit mask. | 1357 | unsigned int irq; |
1324 | * The tile_net_register above sets per tile __intr_id. | ||
1325 | */ | ||
1326 | priv->intr_id = queue->__system_part->__intr_id; | ||
1327 | BUG_ON(!priv->intr_id); | ||
1328 | |||
1329 | /* | ||
1330 | * Register the device interrupt handler. | ||
1331 | * The __ffs() function returns the index into the interrupt handler | ||
1332 | * table from the interrupt bit mask which should have one bit | ||
1333 | * and one bit only set. | ||
1334 | */ | ||
1335 | irq = __ffs(priv->intr_id); | ||
1336 | tile_irq_activate(irq, TILE_IRQ_PERCPU); | ||
1337 | BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt, | ||
1338 | 0, dev->name, (void *)dev) != 0); | ||
1339 | 1358 | ||
1340 | /* ISSUE: How could "priv->fully_opened" ever be "true" here? */ | 1359 | /* |
1341 | 1360 | * Acquire the irq allocated by the hypervisor. Every | |
1342 | if (!priv->fully_opened) { | 1361 | * queue gets the same irq. The "__intr_id" field is |
1362 | * "1 << irq", so we use "__ffs()" to extract "irq". | ||
1363 | */ | ||
1364 | priv->intr_id = queue->__system_part->__intr_id; | ||
1365 | BUG_ON(priv->intr_id == 0); | ||
1366 | irq = __ffs(priv->intr_id); | ||
1343 | 1367 | ||
1344 | int dummy = 0; | 1368 | /* |
1369 | * Register the ingress interrupt handler for this | ||
1370 | * device, permanently. | ||
1371 | * | ||
1372 | * We used to call "free_irq()" in "tile_net_stop()", | ||
1373 | * and then re-register the handler here every time, | ||
1374 | * but that caused DNP errors in "handle_IRQ_event()" | ||
1375 | * because "desc->action" was NULL. See bug 9143. | ||
1376 | */ | ||
1377 | tile_irq_activate(irq, TILE_IRQ_PERCPU); | ||
1378 | BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt, | ||
1379 | 0, dev->name, (void *)dev) != 0); | ||
1380 | } | ||
1345 | 1381 | ||
1382 | { | ||
1346 | /* Allocate initial buffers. */ | 1383 | /* Allocate initial buffers. */ |
1347 | 1384 | ||
1348 | int max_buffers = | 1385 | int max_buffers = |
@@ -1359,18 +1396,21 @@ static int tile_net_open_inner(struct net_device *dev) | |||
1359 | if (info->num_needed_small_buffers != 0 || | 1396 | if (info->num_needed_small_buffers != 0 || |
1360 | info->num_needed_large_buffers != 0) | 1397 | info->num_needed_large_buffers != 0) |
1361 | panic("Insufficient memory for buffer stack!"); | 1398 | panic("Insufficient memory for buffer stack!"); |
1399 | } | ||
1362 | 1400 | ||
1363 | /* Start LIPP/LEPP and activate "ingress" at the shim. */ | 1401 | /* We are about to be active. */ |
1364 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | 1402 | priv->active = true; |
1365 | sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0) | ||
1366 | panic("Failed to activate the LIPP Shim!\n"); | ||
1367 | 1403 | ||
1368 | priv->fully_opened = 1; | 1404 | /* Make sure "active" is visible to all tiles. */ |
1369 | } | 1405 | mb(); |
1370 | 1406 | ||
1371 | /* On each tile, enable the hypervisor to trigger interrupts. */ | 1407 | /* On each tile, enable NAPI and the ingress interrupt. */ |
1372 | /* ISSUE: Do this before starting LIPP/LEPP? */ | 1408 | on_each_cpu(tile_net_open_enable, (void *)dev, 1); |
1373 | on_each_cpu(tile_net_enable_intr, (void *)dev, 1); | 1409 | |
1410 | /* Start LIPP/LEPP and activate "ingress" at the shim. */ | ||
1411 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | ||
1412 | sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0) | ||
1413 | panic("Failed to activate the LIPP Shim!\n"); | ||
1374 | 1414 | ||
1375 | /* Start our transmit queue. */ | 1415 | /* Start our transmit queue. */ |
1376 | netif_start_queue(dev); | 1416 | netif_start_queue(dev); |
@@ -1396,9 +1436,9 @@ static void tile_net_open_retry(struct work_struct *w) | |||
1396 | * ourselves to try again later; otherwise, tell Linux we now have | 1436 | * ourselves to try again later; otherwise, tell Linux we now have |
1397 | * a working link. ISSUE: What if the return value is negative? | 1437 | * a working link. ISSUE: What if the return value is negative? |
1398 | */ | 1438 | */ |
1399 | if (tile_net_open_inner(priv->dev)) | 1439 | if (tile_net_open_inner(priv->dev) != 0) |
1400 | schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, | 1440 | schedule_delayed_work(&priv->retry_work, |
1401 | TILE_NET_RETRY_INTERVAL); | 1441 | TILE_NET_RETRY_INTERVAL); |
1402 | else | 1442 | else |
1403 | netif_carrier_on(priv->dev); | 1443 | netif_carrier_on(priv->dev); |
1404 | } | 1444 | } |
@@ -1412,8 +1452,8 @@ static void tile_net_open_retry(struct work_struct *w) | |||
1412 | * The open entry point is called when a network interface is made | 1452 | * The open entry point is called when a network interface is made |
1413 | * active by the system (IFF_UP). At this point all resources needed | 1453 | * active by the system (IFF_UP). At this point all resources needed |
1414 | * for transmit and receive operations are allocated, the interrupt | 1454 | * for transmit and receive operations are allocated, the interrupt |
1415 | * handler is registered with the OS, the watchdog timer is started, | 1455 | * handler is registered with the OS (if needed), the watchdog timer |
1416 | * and the stack is notified that the interface is ready. | 1456 | * is started, and the stack is notified that the interface is ready. |
1417 | * | 1457 | * |
1418 | * If the actual link is not available yet, then we tell Linux that | 1458 | * If the actual link is not available yet, then we tell Linux that |
1419 | * we have no carrier, and we keep checking until the link comes up. | 1459 | * we have no carrier, and we keep checking until the link comes up. |
@@ -1468,6 +1508,10 @@ static int tile_net_open(struct net_device *dev) | |||
1468 | #endif | 1508 | #endif |
1469 | 1509 | ||
1470 | priv->partly_opened = 1; | 1510 | priv->partly_opened = 1; |
1511 | |||
1512 | } else { | ||
1513 | /* FIXME: Is this possible? */ | ||
1514 | /* printk("Already partly opened.\n"); */ | ||
1471 | } | 1515 | } |
1472 | 1516 | ||
1473 | /* | 1517 | /* |
@@ -1487,57 +1531,17 @@ static int tile_net_open(struct net_device *dev) | |||
1487 | * and then remember to try again later. | 1531 | * and then remember to try again later. |
1488 | */ | 1532 | */ |
1489 | netif_carrier_off(dev); | 1533 | netif_carrier_off(dev); |
1490 | schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, | 1534 | schedule_delayed_work(&priv->retry_work, TILE_NET_RETRY_INTERVAL); |
1491 | TILE_NET_RETRY_INTERVAL); | ||
1492 | 1535 | ||
1493 | return 0; | 1536 | return 0; |
1494 | } | 1537 | } |
1495 | 1538 | ||
1496 | 1539 | ||
1497 | /* | 1540 | static int tile_net_drain_lipp_buffers(struct tile_net_priv *priv) |
1498 | * Disables a network interface. | ||
1499 | * | ||
1500 | * Returns 0, this is not allowed to fail. | ||
1501 | * | ||
1502 | * The close entry point is called when an interface is de-activated | ||
1503 | * by the OS. The hardware is still under the drivers control, but | ||
1504 | * needs to be disabled. A global MAC reset is issued to stop the | ||
1505 | * hardware, and all transmit and receive resources are freed. | ||
1506 | * | ||
1507 | * ISSUE: Can this can be called while "tile_net_poll()" is running? | ||
1508 | */ | ||
1509 | static int tile_net_stop(struct net_device *dev) | ||
1510 | { | 1541 | { |
1511 | struct tile_net_priv *priv = netdev_priv(dev); | 1542 | int n = 0; |
1512 | |||
1513 | bool pending = true; | ||
1514 | |||
1515 | PDEBUG("tile_net_stop()\n"); | ||
1516 | |||
1517 | /* ISSUE: Only needed if not yet fully open. */ | ||
1518 | cancel_delayed_work_sync(&priv->retry_work); | ||
1519 | |||
1520 | /* Can't transmit any more. */ | ||
1521 | netif_stop_queue(dev); | ||
1522 | |||
1523 | /* | ||
1524 | * Disable hypervisor interrupts on each tile. | ||
1525 | */ | ||
1526 | on_each_cpu(tile_net_disable_intr, (void *)dev, 1); | ||
1527 | |||
1528 | /* | ||
1529 | * Unregister the interrupt handler. | ||
1530 | * The __ffs() function returns the index into the interrupt handler | ||
1531 | * table from the interrupt bit mask which should have one bit | ||
1532 | * and one bit only set. | ||
1533 | */ | ||
1534 | if (priv->intr_id) | ||
1535 | free_irq(__ffs(priv->intr_id), dev); | ||
1536 | |||
1537 | /* | ||
1538 | * Drain all the LIPP buffers. | ||
1539 | */ | ||
1540 | 1543 | ||
1544 | /* Drain all the LIPP buffers. */ | ||
1541 | while (true) { | 1545 | while (true) { |
1542 | int buffer; | 1546 | int buffer; |
1543 | 1547 | ||
@@ -1560,43 +1564,105 @@ static int tile_net_stop(struct net_device *dev) | |||
1560 | 1564 | ||
1561 | kfree_skb(skb); | 1565 | kfree_skb(skb); |
1562 | } | 1566 | } |
1567 | |||
1568 | n++; | ||
1563 | } | 1569 | } |
1564 | 1570 | ||
1565 | /* Stop LIPP/LEPP. */ | 1571 | return n; |
1566 | tile_net_stop_aux(dev); | 1572 | } |
1567 | 1573 | ||
1568 | 1574 | ||
1569 | priv->fully_opened = 0; | 1575 | /* |
1576 | * Disables a network interface. | ||
1577 | * | ||
1578 | * Returns 0, this is not allowed to fail. | ||
1579 | * | ||
1580 | * The close entry point is called when an interface is de-activated | ||
1581 | * by the OS. The hardware is still under the drivers control, but | ||
1582 | * needs to be disabled. A global MAC reset is issued to stop the | ||
1583 | * hardware, and all transmit and receive resources are freed. | ||
1584 | * | ||
1585 | * ISSUE: How closely does "netif_running(dev)" mirror "priv->active"? | ||
1586 | * | ||
1587 | * Before we are called by "__dev_close()", "netif_running()" will | ||
1588 | * have been cleared, so no NEW calls to "tile_net_poll()" will be | ||
1589 | * made by "netpoll_poll_dev()". | ||
1590 | * | ||
1591 | * Often, this can cause some tiles to still have packets in their | ||
1592 | * queues, so we must call "tile_net_discard_packets()" later. | ||
1593 | * | ||
1594 | * Note that some other tile may still be INSIDE "tile_net_poll()", | ||
1595 | * and in fact, many will be, if there is heavy network load. | ||
1596 | * | ||
1597 | * Calling "on_each_cpu(tile_net_stop_disable, (void *)dev, 1)" when | ||
1598 | * any tile is still "napi_schedule()"'d will induce a horrible crash | ||
1599 | * when "msleep()" is called. This includes tiles which are inside | ||
1600 | * "tile_net_poll()" which have not yet called "napi_complete()". | ||
1601 | * | ||
1602 | * So, we must first try to wait long enough for other tiles to finish | ||
1603 | * with any current "tile_net_poll()" call, and, hopefully, to clear | ||
1604 | * the "scheduled" flag. ISSUE: It is unclear what happens to tiles | ||
1605 | * which have called "napi_schedule()" but which had not yet tried to | ||
1606 | * call "tile_net_poll()", or which exhausted their budget inside | ||
1607 | * "tile_net_poll()" just before this function was called. | ||
1608 | */ | ||
1609 | static int tile_net_stop(struct net_device *dev) | ||
1610 | { | ||
1611 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1612 | |||
1613 | PDEBUG("tile_net_stop()\n"); | ||
1570 | 1614 | ||
1615 | /* Start discarding packets. */ | ||
1616 | priv->active = false; | ||
1617 | |||
1618 | /* Make sure "active" is visible to all tiles. */ | ||
1619 | mb(); | ||
1571 | 1620 | ||
1572 | /* | 1621 | /* |
1573 | * XXX: ISSUE: It appears that, in practice anyway, by the | 1622 | * On each tile, make sure no NEW packets get delivered, and |
1574 | * time we get here, there are no pending completions. | 1623 | * disable the ingress interrupt. |
1624 | * | ||
1625 | * Note that the ingress interrupt can fire AFTER this, | ||
1626 | * presumably due to packets which were recently delivered, | ||
1627 | * but it will have no effect. | ||
1575 | */ | 1628 | */ |
1576 | while (pending) { | 1629 | on_each_cpu(tile_net_deregister, (void *)dev, 1); |
1577 | 1630 | ||
1578 | struct sk_buff *olds[32]; | 1631 | /* Optimistically drain LIPP buffers. */ |
1579 | unsigned int wanted = 32; | 1632 | (void)tile_net_drain_lipp_buffers(priv); |
1580 | unsigned int i, nolds = 0; | ||
1581 | 1633 | ||
1582 | nolds = tile_net_lepp_grab_comps(dev, olds, | 1634 | /* ISSUE: Only needed if not yet fully open. */ |
1583 | wanted, &pending); | 1635 | cancel_delayed_work_sync(&priv->retry_work); |
1584 | 1636 | ||
1585 | /* ISSUE: We have never actually seen this debug spew. */ | 1637 | /* Can't transmit any more. */ |
1586 | if (nolds != 0) | 1638 | netif_stop_queue(dev); |
1587 | pr_info("During tile_net_stop(), grabbed %d comps.\n", | ||
1588 | nolds); | ||
1589 | 1639 | ||
1590 | for (i = 0; i < nolds; i++) | 1640 | /* Disable NAPI on each tile. */ |
1591 | kfree_skb(olds[i]); | 1641 | on_each_cpu(tile_net_stop_disable, (void *)dev, 1); |
1592 | } | 1642 | |
1643 | /* | ||
1644 | * Drain any remaining LIPP buffers. NOTE: This "printk()" | ||
1645 | * has never been observed, but in theory it could happen. | ||
1646 | */ | ||
1647 | if (tile_net_drain_lipp_buffers(priv) != 0) | ||
1648 | printk("Had to drain some extra LIPP buffers!\n"); | ||
1593 | 1649 | ||
1650 | /* Stop LIPP/LEPP. */ | ||
1651 | tile_net_stop_aux(dev); | ||
1652 | |||
1653 | /* | ||
1654 | * ISSUE: It appears that, in practice anyway, by the time we | ||
1655 | * get here, there are no pending completions, but just in case, | ||
1656 | * we free (all of) them anyway. | ||
1657 | */ | ||
1658 | while (tile_net_lepp_free_comps(dev, true)) | ||
1659 | /* loop */; | ||
1594 | 1660 | ||
1595 | /* Wipe the EPP queue. */ | 1661 | /* Wipe the EPP queue. */ |
1596 | memset(priv->epp_queue, 0, sizeof(lepp_queue_t)); | 1662 | memset(priv->eq, 0, sizeof(lepp_queue_t)); |
1597 | 1663 | ||
1598 | /* Evict the EPP queue. */ | 1664 | /* Evict the EPP queue. */ |
1599 | finv_buffer(priv->epp_queue, PAGE_SIZE); | 1665 | finv_buffer(priv->eq, EQ_SIZE); |
1600 | 1666 | ||
1601 | return 0; | 1667 | return 0; |
1602 | } | 1668 | } |
@@ -1620,7 +1686,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, | |||
1620 | if (b_len != 0) { | 1686 | if (b_len != 0) { |
1621 | 1687 | ||
1622 | if (!hash_default) | 1688 | if (!hash_default) |
1623 | finv_buffer_remote(b_data, b_len); | 1689 | finv_buffer_remote(b_data, b_len, 0); |
1624 | 1690 | ||
1625 | cpa = __pa(b_data); | 1691 | cpa = __pa(b_data); |
1626 | frags[n].cpa_lo = cpa; | 1692 | frags[n].cpa_lo = cpa; |
@@ -1643,7 +1709,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, | |||
1643 | if (!hash_default) { | 1709 | if (!hash_default) { |
1644 | void *va = pfn_to_kaddr(pfn) + f->page_offset; | 1710 | void *va = pfn_to_kaddr(pfn) + f->page_offset; |
1645 | BUG_ON(PageHighMem(f->page)); | 1711 | BUG_ON(PageHighMem(f->page)); |
1646 | finv_buffer_remote(va, f->size); | 1712 | finv_buffer_remote(va, f->size, 0); |
1647 | } | 1713 | } |
1648 | 1714 | ||
1649 | cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; | 1715 | cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; |
@@ -1742,17 +1808,15 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) | |||
1742 | 1808 | ||
1743 | unsigned long irqflags; | 1809 | unsigned long irqflags; |
1744 | 1810 | ||
1745 | lepp_queue_t *eq = priv->epp_queue; | 1811 | lepp_queue_t *eq = priv->eq; |
1746 | 1812 | ||
1747 | struct sk_buff *olds[4]; | 1813 | struct sk_buff *olds[8]; |
1748 | unsigned int wanted = 4; | 1814 | unsigned int wanted = 8; |
1749 | unsigned int i, nolds = 0; | 1815 | unsigned int i, nolds = 0; |
1750 | 1816 | ||
1751 | unsigned int cmd_head, cmd_tail, cmd_next; | 1817 | unsigned int cmd_head, cmd_tail, cmd_next; |
1752 | unsigned int comp_tail; | 1818 | unsigned int comp_tail; |
1753 | 1819 | ||
1754 | unsigned int free_slots; | ||
1755 | |||
1756 | 1820 | ||
1757 | /* Paranoia. */ | 1821 | /* Paranoia. */ |
1758 | BUG_ON(skb->protocol != htons(ETH_P_IP)); | 1822 | BUG_ON(skb->protocol != htons(ETH_P_IP)); |
@@ -1780,34 +1844,32 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) | |||
1780 | 1844 | ||
1781 | /* Enqueue the command. */ | 1845 | /* Enqueue the command. */ |
1782 | 1846 | ||
1783 | spin_lock_irqsave(&priv->cmd_lock, irqflags); | 1847 | spin_lock_irqsave(&priv->eq_lock, irqflags); |
1784 | 1848 | ||
1785 | /* | 1849 | /* |
1786 | * Handle completions if needed to make room. | 1850 | * Handle completions if needed to make room. |
1787 | * HACK: Spin until there is sufficient room. | 1851 | * HACK: Spin until there is sufficient room. |
1788 | */ | 1852 | */ |
1789 | free_slots = lepp_num_free_comp_slots(eq); | 1853 | if (lepp_num_free_comp_slots(eq) == 0) { |
1790 | if (free_slots < 1) { | 1854 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0); |
1791 | spin: | 1855 | if (nolds == 0) { |
1792 | nolds += tile_net_lepp_grab_comps(dev, olds + nolds, | 1856 | busy: |
1793 | wanted - nolds, NULL); | 1857 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); |
1794 | if (lepp_num_free_comp_slots(eq) < 1) | 1858 | return NETDEV_TX_BUSY; |
1795 | goto spin; | 1859 | } |
1796 | } | 1860 | } |
1797 | 1861 | ||
1798 | cmd_head = eq->cmd_head; | 1862 | cmd_head = eq->cmd_head; |
1799 | cmd_tail = eq->cmd_tail; | 1863 | cmd_tail = eq->cmd_tail; |
1800 | 1864 | ||
1801 | /* NOTE: The "gotos" below are untested. */ | ||
1802 | |||
1803 | /* Prepare to advance, detecting full queue. */ | 1865 | /* Prepare to advance, detecting full queue. */ |
1804 | cmd_next = cmd_tail + cmd_size; | 1866 | cmd_next = cmd_tail + cmd_size; |
1805 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) | 1867 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) |
1806 | goto spin; | 1868 | goto busy; |
1807 | if (cmd_next > LEPP_CMD_LIMIT) { | 1869 | if (cmd_next > LEPP_CMD_LIMIT) { |
1808 | cmd_next = 0; | 1870 | cmd_next = 0; |
1809 | if (cmd_next == cmd_head) | 1871 | if (cmd_next == cmd_head) |
1810 | goto spin; | 1872 | goto busy; |
1811 | } | 1873 | } |
1812 | 1874 | ||
1813 | /* Copy the command. */ | 1875 | /* Copy the command. */ |
@@ -1823,14 +1885,18 @@ spin: | |||
1823 | eq->comp_tail = comp_tail; | 1885 | eq->comp_tail = comp_tail; |
1824 | 1886 | ||
1825 | /* Flush before allowing LEPP to handle the command. */ | 1887 | /* Flush before allowing LEPP to handle the command. */ |
1888 | /* ISSUE: Is this the optimal location for the flush? */ | ||
1826 | __insn_mf(); | 1889 | __insn_mf(); |
1827 | 1890 | ||
1828 | eq->cmd_tail = cmd_tail; | 1891 | eq->cmd_tail = cmd_tail; |
1829 | 1892 | ||
1830 | spin_unlock_irqrestore(&priv->cmd_lock, irqflags); | 1893 | /* NOTE: Using "4" here is more efficient than "0" or "2", */ |
1831 | 1894 | /* and, strangely, more efficient than pre-checking the number */ | |
1895 | /* of available completions, and comparing it to 4. */ | ||
1832 | if (nolds == 0) | 1896 | if (nolds == 0) |
1833 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); | 1897 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4); |
1898 | |||
1899 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); | ||
1834 | 1900 | ||
1835 | /* Handle completions. */ | 1901 | /* Handle completions. */ |
1836 | for (i = 0; i < nolds; i++) | 1902 | for (i = 0; i < nolds; i++) |
@@ -1870,10 +1936,10 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1870 | 1936 | ||
1871 | unsigned int num_frags; | 1937 | unsigned int num_frags; |
1872 | 1938 | ||
1873 | lepp_queue_t *eq = priv->epp_queue; | 1939 | lepp_queue_t *eq = priv->eq; |
1874 | 1940 | ||
1875 | struct sk_buff *olds[4]; | 1941 | struct sk_buff *olds[8]; |
1876 | unsigned int wanted = 4; | 1942 | unsigned int wanted = 8; |
1877 | unsigned int i, nolds = 0; | 1943 | unsigned int i, nolds = 0; |
1878 | 1944 | ||
1879 | unsigned int cmd_size = sizeof(lepp_cmd_t); | 1945 | unsigned int cmd_size = sizeof(lepp_cmd_t); |
@@ -1883,8 +1949,6 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1883 | 1949 | ||
1884 | lepp_cmd_t cmds[LEPP_MAX_FRAGS]; | 1950 | lepp_cmd_t cmds[LEPP_MAX_FRAGS]; |
1885 | 1951 | ||
1886 | unsigned int free_slots; | ||
1887 | |||
1888 | 1952 | ||
1889 | /* | 1953 | /* |
1890 | * This is paranoia, since we think that if the link doesn't come | 1954 | * This is paranoia, since we think that if the link doesn't come |
@@ -1905,7 +1969,8 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1905 | if (hash_default) { | 1969 | if (hash_default) { |
1906 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); | 1970 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); |
1907 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) | 1971 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) |
1908 | panic("Non-coherent egress buffer!"); | 1972 | panic("Non-HFH egress buffer! VA=%p Mode=%d PTE=%llx", |
1973 | data, hv_pte_get_mode(pte), hv_pte_val(pte)); | ||
1909 | } | 1974 | } |
1910 | #endif | 1975 | #endif |
1911 | #endif | 1976 | #endif |
@@ -1958,37 +2023,35 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1958 | 2023 | ||
1959 | /* Enqueue the commands. */ | 2024 | /* Enqueue the commands. */ |
1960 | 2025 | ||
1961 | spin_lock_irqsave(&priv->cmd_lock, irqflags); | 2026 | spin_lock_irqsave(&priv->eq_lock, irqflags); |
1962 | 2027 | ||
1963 | /* | 2028 | /* |
1964 | * Handle completions if needed to make room. | 2029 | * Handle completions if needed to make room. |
1965 | * HACK: Spin until there is sufficient room. | 2030 | * HACK: Spin until there is sufficient room. |
1966 | */ | 2031 | */ |
1967 | free_slots = lepp_num_free_comp_slots(eq); | 2032 | if (lepp_num_free_comp_slots(eq) == 0) { |
1968 | if (free_slots < 1) { | 2033 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0); |
1969 | spin: | 2034 | if (nolds == 0) { |
1970 | nolds += tile_net_lepp_grab_comps(dev, olds + nolds, | 2035 | busy: |
1971 | wanted - nolds, NULL); | 2036 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); |
1972 | if (lepp_num_free_comp_slots(eq) < 1) | 2037 | return NETDEV_TX_BUSY; |
1973 | goto spin; | 2038 | } |
1974 | } | 2039 | } |
1975 | 2040 | ||
1976 | cmd_head = eq->cmd_head; | 2041 | cmd_head = eq->cmd_head; |
1977 | cmd_tail = eq->cmd_tail; | 2042 | cmd_tail = eq->cmd_tail; |
1978 | 2043 | ||
1979 | /* NOTE: The "gotos" below are untested. */ | ||
1980 | |||
1981 | /* Copy the commands, or fail. */ | 2044 | /* Copy the commands, or fail. */ |
1982 | for (i = 0; i < num_frags; i++) { | 2045 | for (i = 0; i < num_frags; i++) { |
1983 | 2046 | ||
1984 | /* Prepare to advance, detecting full queue. */ | 2047 | /* Prepare to advance, detecting full queue. */ |
1985 | cmd_next = cmd_tail + cmd_size; | 2048 | cmd_next = cmd_tail + cmd_size; |
1986 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) | 2049 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) |
1987 | goto spin; | 2050 | goto busy; |
1988 | if (cmd_next > LEPP_CMD_LIMIT) { | 2051 | if (cmd_next > LEPP_CMD_LIMIT) { |
1989 | cmd_next = 0; | 2052 | cmd_next = 0; |
1990 | if (cmd_next == cmd_head) | 2053 | if (cmd_next == cmd_head) |
1991 | goto spin; | 2054 | goto busy; |
1992 | } | 2055 | } |
1993 | 2056 | ||
1994 | /* Copy the command. */ | 2057 | /* Copy the command. */ |
@@ -2005,14 +2068,18 @@ spin: | |||
2005 | eq->comp_tail = comp_tail; | 2068 | eq->comp_tail = comp_tail; |
2006 | 2069 | ||
2007 | /* Flush before allowing LEPP to handle the command. */ | 2070 | /* Flush before allowing LEPP to handle the command. */ |
2071 | /* ISSUE: Is this the optimal location for the flush? */ | ||
2008 | __insn_mf(); | 2072 | __insn_mf(); |
2009 | 2073 | ||
2010 | eq->cmd_tail = cmd_tail; | 2074 | eq->cmd_tail = cmd_tail; |
2011 | 2075 | ||
2012 | spin_unlock_irqrestore(&priv->cmd_lock, irqflags); | 2076 | /* NOTE: Using "4" here is more efficient than "0" or "2", */ |
2013 | 2077 | /* and, strangely, more efficient than pre-checking the number */ | |
2078 | /* of available completions, and comparing it to 4. */ | ||
2014 | if (nolds == 0) | 2079 | if (nolds == 0) |
2015 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); | 2080 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4); |
2081 | |||
2082 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); | ||
2016 | 2083 | ||
2017 | /* Handle completions. */ | 2084 | /* Handle completions. */ |
2018 | for (i = 0; i < nolds; i++) | 2085 | for (i = 0; i < nolds; i++) |
@@ -2261,7 +2328,6 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2261 | int ret; | 2328 | int ret; |
2262 | struct net_device *dev; | 2329 | struct net_device *dev; |
2263 | struct tile_net_priv *priv; | 2330 | struct tile_net_priv *priv; |
2264 | struct page *page; | ||
2265 | 2331 | ||
2266 | /* | 2332 | /* |
2267 | * Allocate the device structure. This allocates "priv", calls | 2333 | * Allocate the device structure. This allocates "priv", calls |
@@ -2285,23 +2351,21 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2285 | 2351 | ||
2286 | INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); | 2352 | INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); |
2287 | 2353 | ||
2288 | spin_lock_init(&priv->cmd_lock); | 2354 | spin_lock_init(&priv->eq_lock); |
2289 | spin_lock_init(&priv->comp_lock); | ||
2290 | 2355 | ||
2291 | /* Allocate "epp_queue". */ | 2356 | /* Allocate "eq". */ |
2292 | BUG_ON(get_order(sizeof(lepp_queue_t)) != 0); | 2357 | priv->eq_pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, EQ_ORDER); |
2293 | page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); | 2358 | if (!priv->eq_pages) { |
2294 | if (!page) { | ||
2295 | free_netdev(dev); | 2359 | free_netdev(dev); |
2296 | return NULL; | 2360 | return NULL; |
2297 | } | 2361 | } |
2298 | priv->epp_queue = page_address(page); | 2362 | priv->eq = page_address(priv->eq_pages); |
2299 | 2363 | ||
2300 | /* Register the network device. */ | 2364 | /* Register the network device. */ |
2301 | ret = register_netdev(dev); | 2365 | ret = register_netdev(dev); |
2302 | if (ret) { | 2366 | if (ret) { |
2303 | pr_err("register_netdev %s failed %d\n", dev->name, ret); | 2367 | pr_err("register_netdev %s failed %d\n", dev->name, ret); |
2304 | free_page((unsigned long)priv->epp_queue); | 2368 | __free_pages(priv->eq_pages, EQ_ORDER); |
2305 | free_netdev(dev); | 2369 | free_netdev(dev); |
2306 | return NULL; | 2370 | return NULL; |
2307 | } | 2371 | } |
@@ -2310,7 +2374,7 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2310 | ret = tile_net_get_mac(dev); | 2374 | ret = tile_net_get_mac(dev); |
2311 | if (ret < 0) { | 2375 | if (ret < 0) { |
2312 | unregister_netdev(dev); | 2376 | unregister_netdev(dev); |
2313 | free_page((unsigned long)priv->epp_queue); | 2377 | __free_pages(priv->eq_pages, EQ_ORDER); |
2314 | free_netdev(dev); | 2378 | free_netdev(dev); |
2315 | return NULL; | 2379 | return NULL; |
2316 | } | 2380 | } |
@@ -2321,6 +2385,9 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2321 | 2385 | ||
2322 | /* | 2386 | /* |
2323 | * Module cleanup. | 2387 | * Module cleanup. |
2388 | * | ||
2389 | * FIXME: If compiled as a module, this module cannot be "unloaded", | ||
2390 | * because the "ingress interrupt handler" is registered permanently. | ||
2324 | */ | 2391 | */ |
2325 | static void tile_net_cleanup(void) | 2392 | static void tile_net_cleanup(void) |
2326 | { | 2393 | { |
@@ -2331,8 +2398,8 @@ static void tile_net_cleanup(void) | |||
2331 | struct net_device *dev = tile_net_devs[i]; | 2398 | struct net_device *dev = tile_net_devs[i]; |
2332 | struct tile_net_priv *priv = netdev_priv(dev); | 2399 | struct tile_net_priv *priv = netdev_priv(dev); |
2333 | unregister_netdev(dev); | 2400 | unregister_netdev(dev); |
2334 | finv_buffer(priv->epp_queue, PAGE_SIZE); | 2401 | finv_buffer(priv->eq, EQ_SIZE); |
2335 | free_page((unsigned long)priv->epp_queue); | 2402 | __free_pages(priv->eq_pages, EQ_ORDER); |
2336 | free_netdev(dev); | 2403 | free_netdev(dev); |
2337 | } | 2404 | } |
2338 | } | 2405 | } |
@@ -2355,7 +2422,12 @@ static int tile_net_init_module(void) | |||
2355 | } | 2422 | } |
2356 | 2423 | ||
2357 | 2424 | ||
2425 | module_init(tile_net_init_module); | ||
2426 | module_exit(tile_net_cleanup); | ||
2427 | |||
2428 | |||
2358 | #ifndef MODULE | 2429 | #ifndef MODULE |
2430 | |||
2359 | /* | 2431 | /* |
2360 | * The "network_cpus" boot argument specifies the cpus that are dedicated | 2432 | * The "network_cpus" boot argument specifies the cpus that are dedicated |
2361 | * to handle ingress packets. | 2433 | * to handle ingress packets. |
@@ -2391,8 +2463,5 @@ static int __init network_cpus_setup(char *str) | |||
2391 | return 0; | 2463 | return 0; |
2392 | } | 2464 | } |
2393 | __setup("network_cpus=", network_cpus_setup); | 2465 | __setup("network_cpus=", network_cpus_setup); |
2394 | #endif | ||
2395 | |||
2396 | 2466 | ||
2397 | module_init(tile_net_init_module); | 2467 | #endif |
2398 | module_exit(tile_net_cleanup); | ||