nvdla: kmd: synchronize access to task pool memory

[1] In the absence of synchronization to access task pool, submission may happen at faster rate than cleanups. This shall lead to frequent out-of-memory error. [2] This commit fixes the issues through retries before throwing OOM. a. Sets up a communication between cleanup & submit. b. Retries for allocating task memory - Retry period: 1 ms - Total timeout: 10 ms Bug 200680501 Change-Id: Iea25fbe6e7891938f6318285d4824bc1c54ddb05 Signed-off-by: Arvind M <am@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2518666 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
author: Arvind M <am@nvidia.com> 2021-04-21 03:18:07 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2021-04-22 22:10:49 -0400
commit: c852734bc80c8262f81a0ca666fe444c59d7189f (patch)
tree: 04bfc03eb003e66a18f79ddcd465d75654a3e2fb /drivers/video/tegra
parent: 31f770700e4b7fc80fae72a670d3d482727887f1 (diff)
3 files changed, 40 insertions, 7 deletions
diff --git a/drivers/video/tegra/host/nvdla/dla_queue.c b/drivers/video/tegra/host/nvdla/dla_queue.c
index 33fa3aae4..ed3d685e1 100644
--- a/drivers/video/tegra/host/nvdla/dla_queue.c
+++ b/drivers/video/tegra/host/nvdla/dla_queue.c
@@ -1,7 +1,7 @@
 /*
 * NVDLA queue management
 *
- * Copyright (c) 2019-2020, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2019-2021, NVIDIA Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -46,8 +46,9 @@
 * lock                 Mutex lock for the array access.
 * alloc_table          Keep track of the index being assigned
 *                      and freed for a task
- * max_task_cnt         Maximum task count that can be supported.
+ * max_task_cnt Maximum task count that can be supported.
- *
+ * cleanup_done Completion status of cleanup wait.
+ * cleanup_wait Records wait for cleanup action.
 */
 struct nvdla_queue_task_pool {
@@ -58,6 +59,9 @@ struct nvdla_queue_task_pool {
        unsigned long alloc_table;
        unsigned long max_task_cnt;
+        struct completion cleanup_done;
+        int cleanup_wait;
 };
 static int nvdla_queue_task_pool_alloc(struct platform_device *pdev,
@@ -96,6 +100,9 @@ static int nvdla_queue_task_pool_alloc(struct platform_device *pdev,
        mutex_init(&task_pool->lock);
+        init_completion(&task_pool->cleanup_done);
+        task_pool->cleanup_wait = 0;
        return err;
 err_alloc_task_pool:
@@ -549,6 +556,18 @@ int nvdla_queue_alloc_task_memory(
        struct nvdla_queue_task_pool *task_pool =
                (struct nvdla_queue_task_pool *)queue->task_pool;
+        if (task_pool->cleanup_wait == 1) {
+                unsigned long timeout =
+                        msecs_to_jiffies(NVDLA_TASK_MEM_AVAIL_RETRY_PERIOD);
+                /**
+                 * Error intentionally ignored to be catpured as part of
+                 * out-of-range index during allocation.
+                 **/
+                (void) wait_for_completion_timeout(&task_pool->cleanup_done,
+                                timeout);
+        }
        mutex_lock(&task_pool->lock);
        index = find_first_zero_bit(&task_pool->alloc_table,
@@ -556,8 +575,8 @@ int nvdla_queue_alloc_task_memory(
        /* quit if pre-allocated task array is not free */
        if (index >= task_pool->max_task_cnt) {
-                dev_err(&pdev->dev,
+                dev_warn(&pdev->dev, "failed to get Task Pool Memory\n");
-                                "failed to get Task Pool Memory\n");
+                task_pool->cleanup_wait = 1; // wait for cleanup
                err = -EAGAIN;
                goto err_alloc_task_mem;
        }
@@ -596,5 +615,10 @@ void nvdla_queue_free_task_memory(struct nvdla_queue *queue, int index)
        mutex_lock(&task_pool->lock);
        clear_bit(index, &task_pool->alloc_table);
+        if (task_pool->cleanup_wait == 1) {
+                task_pool->cleanup_wait = 0;
+                complete(&task_pool->cleanup_done);
+        }
        mutex_unlock(&task_pool->lock);
 }
diff --git a/drivers/video/tegra/host/nvdla/dla_queue.h b/drivers/video/tegra/host/nvdla/dla_queue.h
index 1f5801c66..8bded0f65 100644
--- a/drivers/video/tegra/host/nvdla/dla_queue.h
+++ b/drivers/video/tegra/host/nvdla/dla_queue.h
@@ -1,7 +1,7 @@
 /*
 * NVHOST Queue management header for T194
 *
- * Copyright (c) 2016-2017, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2016-2021, NVIDIA Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -21,6 +21,9 @@
 #include <linux/kref.h>
+#define NVDLA_TASK_MEM_AVAIL_TIMEOUT_MS 10  /* 10 ms */
+#define NVDLA_TASK_MEM_AVAIL_RETRY_PERIOD 1 /* 1 ms */
 struct nvdla_queue_task_pool;
 /**
diff --git a/drivers/video/tegra/host/nvdla/nvdla_queue.c b/drivers/video/tegra/host/nvdla/nvdla_queue.c
index 5c7bddd4b..ccb182b24 100644
--- a/drivers/video/tegra/host/nvdla/nvdla_queue.c
+++ b/drivers/video/tegra/host/nvdla/nvdla_queue.c
@@ -88,11 +88,17 @@ int nvdla_get_task_mem(struct nvdla_queue *queue,
        struct nvdla_task *task = NULL;
        struct nvdla_queue_task_mem_info task_mem_info;
        struct platform_device *pdev = queue->pool->pdev;
+        int n_retries = (NVDLA_TASK_MEM_AVAIL_TIMEOUT_MS /
+                                        NVDLA_TASK_MEM_AVAIL_RETRY_PERIOD);
        nvdla_dbg_fn(pdev, "");
        /* get mem task descriptor and task mem from task_mem_pool */
-        err = nvdla_queue_alloc_task_memory(queue, &task_mem_info);
+        do {
+                n_retries = n_retries - 1;
+                err = nvdla_queue_alloc_task_memory(queue, &task_mem_info);
+        } while ((n_retries > 0) && (err == -EAGAIN));
        task = task_mem_info.kmem_addr;
        if ((err < 0) || !task)
                goto fail_to_assign_pool;
author	Arvind M <am@nvidia.com>	2021-04-21 03:18:07 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2021-04-22 22:10:49 -0400
commit	c852734bc80c8262f81a0ca666fe444c59d7189f (patch)
tree	04bfc03eb003e66a18f79ddcd465d75654a3e2fb /drivers/video/tegra
parent	31f770700e4b7fc80fae72a670d3d482727887f1 (diff)