aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-11-02 09:19:33 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-03 12:25:09 -0500
commitd75b1ade567ffab085e8adbbdacf0092d10cd09c (patch)
tree4e5f935da32ff016c66597727d09b3eb4a1ee6c3 /net
parent4cdb1e2e3d3495423db558d3bb7ed11d66aabce7 (diff)
net: less interrupt masking in NAPI
net_rx_action() can mask irqs a single time to transfert sd->poll_list into a private list, for a very short duration. Then, napi_complete() can avoid masking irqs again, and net_rx_action() only needs to mask irq again in slow path. This patch removes 2 couples of irq mask/unmask per typical NAPI run, more if multiple napi were triggered. Note this also allows to give control back to caller (do_softirq()) more often, so that other softirq handlers can be called a bit earlier, or ksoftirqd can be wakeup earlier under pressure. This was developed while testing an alternative to RX interrupt mitigation to reduce latencies while keeping or improving GRO aggregation on fast NIC. Idea is to test napi->gro_list at the end of a napi->poll() and reschedule one NAPI poll, but after servicing a full round of softirqs (timers, TX, rcu, ...). This will be allowed only if softirq is currently serviced by idle task or ksoftirqd, and resched not needed. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c68
1 files changed, 43 insertions, 25 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ebf778df58cd..40be481268de 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4316,20 +4316,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4316 local_irq_enable(); 4316 local_irq_enable();
4317} 4317}
4318 4318
4319static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
4320{
4321#ifdef CONFIG_RPS
4322 return sd->rps_ipi_list != NULL;
4323#else
4324 return false;
4325#endif
4326}
4327
4319static int process_backlog(struct napi_struct *napi, int quota) 4328static int process_backlog(struct napi_struct *napi, int quota)
4320{ 4329{
4321 int work = 0; 4330 int work = 0;
4322 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); 4331 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4323 4332
4324#ifdef CONFIG_RPS
4325 /* Check if we have pending ipi, its better to send them now, 4333 /* Check if we have pending ipi, its better to send them now,
4326 * not waiting net_rx_action() end. 4334 * not waiting net_rx_action() end.
4327 */ 4335 */
4328 if (sd->rps_ipi_list) { 4336 if (sd_has_rps_ipi_waiting(sd)) {
4329 local_irq_disable(); 4337 local_irq_disable();
4330 net_rps_action_and_irq_enable(sd); 4338 net_rps_action_and_irq_enable(sd);
4331 } 4339 }
4332#endif 4340
4333 napi->weight = weight_p; 4341 napi->weight = weight_p;
4334 local_irq_disable(); 4342 local_irq_disable();
4335 while (1) { 4343 while (1) {
@@ -4356,7 +4364,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
4356 * We can use a plain write instead of clear_bit(), 4364 * We can use a plain write instead of clear_bit(),
4357 * and we dont need an smp_mb() memory barrier. 4365 * and we dont need an smp_mb() memory barrier.
4358 */ 4366 */
4359 list_del(&napi->poll_list);
4360 napi->state = 0; 4367 napi->state = 0;
4361 rps_unlock(sd); 4368 rps_unlock(sd);
4362 4369
@@ -4406,7 +4413,7 @@ void __napi_complete(struct napi_struct *n)
4406 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); 4413 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4407 BUG_ON(n->gro_list); 4414 BUG_ON(n->gro_list);
4408 4415
4409 list_del(&n->poll_list); 4416 list_del_init(&n->poll_list);
4410 smp_mb__before_atomic(); 4417 smp_mb__before_atomic();
4411 clear_bit(NAPI_STATE_SCHED, &n->state); 4418 clear_bit(NAPI_STATE_SCHED, &n->state);
4412} 4419}
@@ -4424,9 +4431,15 @@ void napi_complete(struct napi_struct *n)
4424 return; 4431 return;
4425 4432
4426 napi_gro_flush(n, false); 4433 napi_gro_flush(n, false);
4427 local_irq_save(flags); 4434
4428 __napi_complete(n); 4435 if (likely(list_empty(&n->poll_list))) {
4429 local_irq_restore(flags); 4436 WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
4437 } else {
4438 /* If n->poll_list is not empty, we need to mask irqs */
4439 local_irq_save(flags);
4440 __napi_complete(n);
4441 local_irq_restore(flags);
4442 }
4430} 4443}
4431EXPORT_SYMBOL(napi_complete); 4444EXPORT_SYMBOL(napi_complete);
4432 4445
@@ -4520,29 +4533,28 @@ static void net_rx_action(struct softirq_action *h)
4520 struct softnet_data *sd = this_cpu_ptr(&softnet_data); 4533 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
4521 unsigned long time_limit = jiffies + 2; 4534 unsigned long time_limit = jiffies + 2;
4522 int budget = netdev_budget; 4535 int budget = netdev_budget;
4536 LIST_HEAD(list);
4537 LIST_HEAD(repoll);
4523 void *have; 4538 void *have;
4524 4539
4525 local_irq_disable(); 4540 local_irq_disable();
4541 list_splice_init(&sd->poll_list, &list);
4542 local_irq_enable();
4526 4543
4527 while (!list_empty(&sd->poll_list)) { 4544 while (!list_empty(&list)) {
4528 struct napi_struct *n; 4545 struct napi_struct *n;
4529 int work, weight; 4546 int work, weight;
4530 4547
4531 /* If softirq window is exhuasted then punt. 4548 /* If softirq window is exhausted then punt.
4532 * Allow this to run for 2 jiffies since which will allow 4549 * Allow this to run for 2 jiffies since which will allow
4533 * an average latency of 1.5/HZ. 4550 * an average latency of 1.5/HZ.
4534 */ 4551 */
4535 if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) 4552 if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
4536 goto softnet_break; 4553 goto softnet_break;
4537 4554
4538 local_irq_enable();
4539 4555
4540 /* Even though interrupts have been re-enabled, this 4556 n = list_first_entry(&list, struct napi_struct, poll_list);
4541 * access is safe because interrupts can only add new 4557 list_del_init(&n->poll_list);
4542 * entries to the tail of this list, and only ->poll()
4543 * calls can remove this head entry from the list.
4544 */
4545 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
4546 4558
4547 have = netpoll_poll_lock(n); 4559 have = netpoll_poll_lock(n);
4548 4560
@@ -4564,8 +4576,6 @@ static void net_rx_action(struct softirq_action *h)
4564 4576
4565 budget -= work; 4577 budget -= work;
4566 4578
4567 local_irq_disable();
4568
4569 /* Drivers must not modify the NAPI state if they 4579 /* Drivers must not modify the NAPI state if they
4570 * consume the entire weight. In such cases this code 4580 * consume the entire weight. In such cases this code
4571 * still "owns" the NAPI instance and therefore can 4581 * still "owns" the NAPI instance and therefore can
@@ -4573,32 +4583,40 @@ static void net_rx_action(struct softirq_action *h)
4573 */ 4583 */
4574 if (unlikely(work == weight)) { 4584 if (unlikely(work == weight)) {
4575 if (unlikely(napi_disable_pending(n))) { 4585 if (unlikely(napi_disable_pending(n))) {
4576 local_irq_enable();
4577 napi_complete(n); 4586 napi_complete(n);
4578 local_irq_disable();
4579 } else { 4587 } else {
4580 if (n->gro_list) { 4588 if (n->gro_list) {
4581 /* flush too old packets 4589 /* flush too old packets
4582 * If HZ < 1000, flush all packets. 4590 * If HZ < 1000, flush all packets.
4583 */ 4591 */
4584 local_irq_enable();
4585 napi_gro_flush(n, HZ >= 1000); 4592 napi_gro_flush(n, HZ >= 1000);
4586 local_irq_disable();
4587 } 4593 }
4588 list_move_tail(&n->poll_list, &sd->poll_list); 4594 list_add_tail(&n->poll_list, &repoll);
4589 } 4595 }
4590 } 4596 }
4591 4597
4592 netpoll_poll_unlock(have); 4598 netpoll_poll_unlock(have);
4593 } 4599 }
4600
4601 if (!sd_has_rps_ipi_waiting(sd) &&
4602 list_empty(&list) &&
4603 list_empty(&repoll))
4604 return;
4594out: 4605out:
4606 local_irq_disable();
4607
4608 list_splice_tail_init(&sd->poll_list, &list);
4609 list_splice_tail(&repoll, &list);
4610 list_splice(&list, &sd->poll_list);
4611 if (!list_empty(&sd->poll_list))
4612 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4613
4595 net_rps_action_and_irq_enable(sd); 4614 net_rps_action_and_irq_enable(sd);
4596 4615
4597 return; 4616 return;
4598 4617
4599softnet_break: 4618softnet_break:
4600 sd->time_squeeze++; 4619 sd->time_squeeze++;
4601 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4602 goto out; 4620 goto out;
4603} 4621}
4604 4622