summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/clk_arb.c
diff options
context:
space:
mode:
authorSourab Gupta <sourabg@nvidia.com>2018-04-30 06:23:22 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-07 07:42:29 -0400
commit2b498cdf8aaa6c628cc1ac8e2b0b3a582c0decb3 (patch)
tree7646f75c290fab717d674ebc9b6d6b870146523f /drivers/gpu/nvgpu/common/linux/clk_arb.c
parent3dabdf3e6d703ffdb4549ab54cf5bca34460706b (diff)
gpu: nvgpu: remove rcu locks in clk arbiter
RCU's are available only in (linux) kernel. Though they are able to achieve lockless access in some specific scenarios, they are heavily dependent on the kernel for their functionality. E.g. synchronize_rcu(), which depends on the kernel in order to delimit read side critical sections. As such it is very difficult to implement constructs analogous to RCUs in userspace code. As a result the code which depends on RCU's for synchronization is not portable between OS'es, especially if one of them is in userspace, viz. QNX. Also, if the code is not in performance critical path, we can do with non-RCU constructs. For clk arbiter code here, RCU's are replaced by the traditional spinlocks, so that the code could be used by QNX down the line. Jira VQRM-3741 Change-Id: I178e5958788c8fd998303a6a94d8f2f328201508 Signed-off-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1705535 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/clk_arb.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb.c67
1 files changed, 34 insertions, 33 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
index 39763f14..ec40a6ce 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -16,9 +16,8 @@
16 16
17#include <linux/cdev.h> 17#include <linux/cdev.h>
18#include <linux/file.h> 18#include <linux/file.h>
19#include <linux/list.h>
19#include <linux/anon_inodes.h> 20#include <linux/anon_inodes.h>
20#include <linux/rculist.h>
21#include <linux/llist.h>
22#include <linux/uaccess.h> 21#include <linux/uaccess.h>
23 22
24#include <nvgpu/bitops.h> 23#include <nvgpu/bitops.h>
@@ -740,7 +739,6 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
740 struct nvgpu_clk_dev *tmp; 739 struct nvgpu_clk_dev *tmp;
741 struct nvgpu_clk_arb_target *target, *actual; 740 struct nvgpu_clk_arb_target *target, *actual;
742 struct gk20a *g = arb->g; 741 struct gk20a *g = arb->g;
743 struct llist_node *head;
744 742
745 u32 pstate = VF_POINT_INVALID_PSTATE; 743 u32 pstate = VF_POINT_INVALID_PSTATE;
746 u32 voltuv, voltuv_sram; 744 u32 voltuv, voltuv_sram;
@@ -775,25 +773,21 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
775 gpc2clk_target = 0; 773 gpc2clk_target = 0;
776 mclk_target = 0; 774 mclk_target = 0;
777 775
778 rcu_read_lock(); 776 nvgpu_spinlock_acquire(&arb->sessions_lock);
779 list_for_each_entry_rcu(session, &arb->sessions, link) { 777 list_for_each_entry(session, &arb->sessions, link) {
780 if (!session->zombie) { 778 if (!session->zombie) {
781 mclk_set = false; 779 mclk_set = false;
782 gpc2clk_set = false; 780 gpc2clk_set = false;
783 target = NV_ACCESS_ONCE(session->target) == 781 target = (session->target == &session->target_pool[0] ?
784 &session->target_pool[0] ?
785 &session->target_pool[1] : 782 &session->target_pool[1] :
786 &session->target_pool[0]; 783 &session->target_pool[0]);
787 /* Do not reorder pointer */ 784 nvgpu_spinlock_acquire(&session->session_lock);
788 nvgpu_smp_rmb(); 785 if (!list_empty(&session->targets)) {
789 head = llist_del_all(&session->targets);
790 if (head) {
791
792 /* Copy over state */ 786 /* Copy over state */
793 target->mclk = session->target->mclk; 787 target->mclk = session->target->mclk;
794 target->gpc2clk = session->target->gpc2clk; 788 target->gpc2clk = session->target->gpc2clk;
795 /* Query the latest committed request */ 789 /* Query the latest committed request */
796 llist_for_each_entry_safe(dev, tmp, head, 790 list_for_each_entry_safe(dev, tmp, &session->targets,
797 node) { 791 node) {
798 if (!mclk_set && dev->mclk_target_mhz) { 792 if (!mclk_set && dev->mclk_target_mhz) {
799 target->mclk = 793 target->mclk =
@@ -807,12 +801,14 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
807 gpc2clk_set = true; 801 gpc2clk_set = true;
808 } 802 }
809 nvgpu_ref_get(&dev->refcount); 803 nvgpu_ref_get(&dev->refcount);
810 llist_add(&dev->node, &arb->requests); 804 list_del(&dev->node);
805 nvgpu_spinlock_acquire(&arb->requests_lock);
806 list_add(&dev->node, &arb->requests);
807 nvgpu_spinlock_release(&arb->requests_lock);
811 } 808 }
812 /* Ensure target is updated before ptr sawp */
813 nvgpu_smp_wmb();
814 xchg(&session->target, target); 809 xchg(&session->target, target);
815 } 810 }
811 nvgpu_spinlock_release(&session->session_lock);
816 812
817 mclk_target = mclk_target > session->target->mclk ? 813 mclk_target = mclk_target > session->target->mclk ?
818 mclk_target : session->target->mclk; 814 mclk_target : session->target->mclk;
@@ -822,7 +818,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
822 gpc2clk_target : session->target->gpc2clk; 818 gpc2clk_target : session->target->gpc2clk;
823 } 819 }
824 } 820 }
825 rcu_read_unlock(); 821 nvgpu_spinlock_release(&arb->sessions_lock);
826 822
827 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : 823 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
828 arb->gpc2clk_default_mhz; 824 arb->gpc2clk_default_mhz;
@@ -1010,22 +1006,24 @@ exit_arb:
1010 1006
1011 current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); 1007 current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask);
1012 /* notify completion for all requests */ 1008 /* notify completion for all requests */
1013 head = llist_del_all(&arb->requests); 1009 nvgpu_spinlock_acquire(&arb->requests_lock);
1014 llist_for_each_entry_safe(dev, tmp, head, node) { 1010 list_for_each_entry_safe(dev, tmp, &arb->requests, node) {
1015 nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM); 1011 nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM);
1016 nvgpu_cond_signal_interruptible(&dev->readout_wq); 1012 nvgpu_cond_signal_interruptible(&dev->readout_wq);
1017 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); 1013 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1014 list_del(&dev->node);
1018 } 1015 }
1016 nvgpu_spinlock_release(&arb->requests_lock);
1019 1017
1020 nvgpu_atomic_set(&arb->notification_queue.head, 1018 nvgpu_atomic_set(&arb->notification_queue.head,
1021 nvgpu_atomic_read(&arb->notification_queue.tail)); 1019 nvgpu_atomic_read(&arb->notification_queue.tail));
1022 /* notify event for all users */ 1020 /* notify event for all users */
1023 rcu_read_lock(); 1021 nvgpu_spinlock_acquire(&arb->users_lock);
1024 list_for_each_entry_rcu(dev, &arb->users, link) { 1022 list_for_each_entry(dev, &arb->users, link) {
1025 alarms_notified |= 1023 alarms_notified |=
1026 nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); 1024 nvgpu_clk_arb_notify(dev, arb->actual, current_alarm);
1027 } 1025 }
1028 rcu_read_unlock(); 1026 nvgpu_spinlock_release(&arb->users_lock);
1029 1027
1030 /* clear alarms */ 1028 /* clear alarms */
1031 nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & 1029 nvgpu_clk_arb_clear_global_alarm(g, alarms_notified &
@@ -1054,6 +1052,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1054 goto mutex_fail; 1052 goto mutex_fail;
1055 nvgpu_spinlock_init(&arb->sessions_lock); 1053 nvgpu_spinlock_init(&arb->sessions_lock);
1056 nvgpu_spinlock_init(&arb->users_lock); 1054 nvgpu_spinlock_init(&arb->users_lock);
1055 nvgpu_spinlock_init(&arb->requests_lock);
1057 1056
1058 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); 1057 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1059 if (!arb->mclk_f_points) { 1058 if (!arb->mclk_f_points) {
@@ -1119,9 +1118,9 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1119 if (err < 0) 1118 if (err < 0)
1120 goto init_fail; 1119 goto init_fail;
1121 1120
1122 INIT_LIST_HEAD_RCU(&arb->users); 1121 INIT_LIST_HEAD(&arb->users);
1123 INIT_LIST_HEAD_RCU(&arb->sessions); 1122 INIT_LIST_HEAD(&arb->sessions);
1124 init_llist_head(&arb->requests); 1123 INIT_LIST_HEAD(&arb->requests);
1125 1124
1126 nvgpu_cond_init(&arb->request_wq); 1125 nvgpu_cond_init(&arb->request_wq);
1127 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, 1126 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
@@ -1245,10 +1244,11 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
1245 nvgpu_smp_wmb(); 1244 nvgpu_smp_wmb();
1246 session->target = &session->target_pool[0]; 1245 session->target = &session->target_pool[0];
1247 1246
1248 init_llist_head(&session->targets); 1247 INIT_LIST_HEAD(&session->targets);
1248 nvgpu_spinlock_init(&session->session_lock);
1249 1249
1250 nvgpu_spinlock_acquire(&arb->sessions_lock); 1250 nvgpu_spinlock_acquire(&arb->sessions_lock);
1251 list_add_tail_rcu(&session->link, &arb->sessions); 1251 list_add_tail(&session->link, &arb->sessions);
1252 nvgpu_spinlock_release(&arb->sessions_lock); 1252 nvgpu_spinlock_release(&arb->sessions_lock);
1253 1253
1254 *_session = session; 1254 *_session = session;
@@ -1272,21 +1272,22 @@ void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
1272 struct nvgpu_clk_arb *arb = session->g->clk_arb; 1272 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1273 struct gk20a *g = session->g; 1273 struct gk20a *g = session->g;
1274 struct nvgpu_clk_dev *dev, *tmp; 1274 struct nvgpu_clk_dev *dev, *tmp;
1275 struct llist_node *head;
1276 1275
1277 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " "); 1276 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1278 1277
1279 if (arb) { 1278 if (arb) {
1280 nvgpu_spinlock_acquire(&arb->sessions_lock); 1279 nvgpu_spinlock_acquire(&arb->sessions_lock);
1281 list_del_rcu(&session->link); 1280 list_del(&session->link);
1282 nvgpu_spinlock_release(&arb->sessions_lock); 1281 nvgpu_spinlock_release(&arb->sessions_lock);
1283 } 1282 }
1284 1283
1285 head = llist_del_all(&session->targets); 1284 nvgpu_spinlock_acquire(&session->session_lock);
1286 llist_for_each_entry_safe(dev, tmp, head, node) { 1285 list_for_each_entry_safe(dev, tmp, &session->targets, node) {
1287 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); 1286 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1287 list_del(&dev->node);
1288 } 1288 }
1289 synchronize_rcu(); 1289 nvgpu_spinlock_release(&session->session_lock);
1290
1290 nvgpu_kfree(g, session); 1291 nvgpu_kfree(g, session);
1291} 1292}
1292 1293