aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNetanel Belgazal <netanel@amazon.com>2017-06-11 08:42:48 -0400
committerDavid S. Miller <davem@davemloft.net>2017-06-11 16:36:46 -0400
commita3af7c18cfe545a711e5df7491b7d6df71eba2ff (patch)
treeeb7c182c442a68f6c11d4b675df977ceb134b240
parent0857d92f71b6cb75281fde913554b2d5436c394b (diff)
net: ena: fix theoretical Rx hang on low memory systems
For the rare case where the device runs out of free rx buffer descriptors (in case of pressure on kernel memory), and the napi handler continuously fail to refill new Rx descriptors until device rx queue totally runs out of all free rx buffers to post incoming packet, leading to a deadlock: * The device won't send interrupts since all the new Rx packets will be dropped. * The napi handler won't try to allocate new Rx descriptors since allocation is part of NAPI that's not being invoked any more The fix involves detecting this scenario and rescheduling NAPI (to refill buffers) by the keepalive/watchdog task. Fixes: 1738cd3ed342 ("Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Netanel Belgazal <netanel@amazon.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c1
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c55
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h2
3 files changed, 58 insertions, 0 deletions
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 67b2338f8fb3..533b2fbdeef1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -94,6 +94,7 @@ static const struct ena_stats ena_stats_rx_strings[] = {
94 ENA_STAT_RX_ENTRY(dma_mapping_err), 94 ENA_STAT_RX_ENTRY(dma_mapping_err),
95 ENA_STAT_RX_ENTRY(bad_desc_num), 95 ENA_STAT_RX_ENTRY(bad_desc_num),
96 ENA_STAT_RX_ENTRY(rx_copybreak_pkt), 96 ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
97 ENA_STAT_RX_ENTRY(empty_rx_ring),
97}; 98};
98 99
99static const struct ena_stats ena_stats_ena_com_strings[] = { 100static const struct ena_stats ena_stats_ena_com_strings[] = {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 4e9fbddd3b47..3c366bfbbab1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
190 rxr->sgl_size = adapter->max_rx_sgl_size; 190 rxr->sgl_size = adapter->max_rx_sgl_size;
191 rxr->smoothed_interval = 191 rxr->smoothed_interval =
192 ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); 192 ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
193 rxr->empty_rx_queue = 0;
193 } 194 }
194} 195}
195 196
@@ -2619,6 +2620,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
2619 adapter->last_monitored_tx_qid = i % adapter->num_queues; 2620 adapter->last_monitored_tx_qid = i % adapter->num_queues;
2620} 2621}
2621 2622
2623/* trigger napi schedule after 2 consecutive detections */
2624#define EMPTY_RX_REFILL 2
2625/* For the rare case where the device runs out of Rx descriptors and the
2626 * napi handler failed to refill new Rx descriptors (due to a lack of memory
2627 * for example).
2628 * This case will lead to a deadlock:
2629 * The device won't send interrupts since all the new Rx packets will be dropped
2630 * The napi handler won't allocate new Rx descriptors so the device will be
2631 * able to send new packets.
2632 *
2633 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
2634 * It is recommended to have at least 512MB, with a minimum of 128MB for
2635 * constrained environment).
2636 *
2637 * When such a situation is detected - Reschedule napi
2638 */
2639static void check_for_empty_rx_ring(struct ena_adapter *adapter)
2640{
2641 struct ena_ring *rx_ring;
2642 int i, refill_required;
2643
2644 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2645 return;
2646
2647 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2648 return;
2649
2650 for (i = 0; i < adapter->num_queues; i++) {
2651 rx_ring = &adapter->rx_ring[i];
2652
2653 refill_required =
2654 ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
2655 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
2656 rx_ring->empty_rx_queue++;
2657
2658 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
2659 u64_stats_update_begin(&rx_ring->syncp);
2660 rx_ring->rx_stats.empty_rx_ring++;
2661 u64_stats_update_end(&rx_ring->syncp);
2662
2663 netif_err(adapter, drv, adapter->netdev,
2664 "trigger refill for ring %d\n", i);
2665
2666 napi_schedule(rx_ring->napi);
2667 rx_ring->empty_rx_queue = 0;
2668 }
2669 } else {
2670 rx_ring->empty_rx_queue = 0;
2671 }
2672 }
2673}
2674
2622/* Check for keep alive expiration */ 2675/* Check for keep alive expiration */
2623static void check_for_missing_keep_alive(struct ena_adapter *adapter) 2676static void check_for_missing_keep_alive(struct ena_adapter *adapter)
2624{ 2677{
@@ -2673,6 +2726,8 @@ static void ena_timer_service(unsigned long data)
2673 2726
2674 check_for_missing_tx_completions(adapter); 2727 check_for_missing_tx_completions(adapter);
2675 2728
2729 check_for_empty_rx_ring(adapter);
2730
2676 if (debug_area) 2731 if (debug_area)
2677 ena_dump_stats_to_buf(adapter, debug_area); 2732 ena_dump_stats_to_buf(adapter, debug_area);
2678 2733
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 0e22bce6239d..8828f1d6dd22 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -184,6 +184,7 @@ struct ena_stats_rx {
184 u64 dma_mapping_err; 184 u64 dma_mapping_err;
185 u64 bad_desc_num; 185 u64 bad_desc_num;
186 u64 rx_copybreak_pkt; 186 u64 rx_copybreak_pkt;
187 u64 empty_rx_ring;
187}; 188};
188 189
189struct ena_ring { 190struct ena_ring {
@@ -231,6 +232,7 @@ struct ena_ring {
231 struct ena_stats_tx tx_stats; 232 struct ena_stats_tx tx_stats;
232 struct ena_stats_rx rx_stats; 233 struct ena_stats_rx rx_stats;
233 }; 234 };
235 int empty_rx_queue;
234} ____cacheline_aligned; 236} ____cacheline_aligned;
235 237
236struct ena_stats_dev { 238struct ena_stats_dev {