diff options
author | Sourab Gupta <sourabg@nvidia.com> | 2018-04-30 06:23:22 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-05-07 07:42:29 -0400 |
commit | 2b498cdf8aaa6c628cc1ac8e2b0b3a582c0decb3 (patch) | |
tree | 7646f75c290fab717d674ebc9b6d6b870146523f /drivers/gpu/nvgpu | |
parent | 3dabdf3e6d703ffdb4549ab54cf5bca34460706b (diff) |
gpu: nvgpu: remove rcu locks in clk arbiter
RCU's are available only in (linux) kernel. Though they are
able to achieve lockless access in some specific scenarios,
they are heavily dependent on the kernel for their functionality.
E.g. synchronize_rcu(), which depends on the kernel in order to
delimit read side critical sections.
As such it is very difficult to implement constructs analogous
to RCUs in userspace code. As a result the code which depends on
RCU's for synchronization is not portable between OS'es,
especially if one of them is in userspace, viz. QNX.
Also, if the code is not in performance critical path, we can do
with non-RCU constructs.
For clk arbiter code here, RCU's are replaced by the traditional
spinlocks, so that the code could be used by QNX down the line.
Jira VQRM-3741
Change-Id: I178e5958788c8fd998303a6a94d8f2f328201508
Signed-off-by: Sourab Gupta <sourabg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1705535
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/clk_arb.c | 67 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/clk_arb_linux.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c | 12 |
3 files changed, 45 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c index 39763f14..ec40a6ce 100644 --- a/drivers/gpu/nvgpu/common/linux/clk_arb.c +++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c | |||
@@ -16,9 +16,8 @@ | |||
16 | 16 | ||
17 | #include <linux/cdev.h> | 17 | #include <linux/cdev.h> |
18 | #include <linux/file.h> | 18 | #include <linux/file.h> |
19 | #include <linux/list.h> | ||
19 | #include <linux/anon_inodes.h> | 20 | #include <linux/anon_inodes.h> |
20 | #include <linux/rculist.h> | ||
21 | #include <linux/llist.h> | ||
22 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
23 | 22 | ||
24 | #include <nvgpu/bitops.h> | 23 | #include <nvgpu/bitops.h> |
@@ -740,7 +739,6 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
740 | struct nvgpu_clk_dev *tmp; | 739 | struct nvgpu_clk_dev *tmp; |
741 | struct nvgpu_clk_arb_target *target, *actual; | 740 | struct nvgpu_clk_arb_target *target, *actual; |
742 | struct gk20a *g = arb->g; | 741 | struct gk20a *g = arb->g; |
743 | struct llist_node *head; | ||
744 | 742 | ||
745 | u32 pstate = VF_POINT_INVALID_PSTATE; | 743 | u32 pstate = VF_POINT_INVALID_PSTATE; |
746 | u32 voltuv, voltuv_sram; | 744 | u32 voltuv, voltuv_sram; |
@@ -775,25 +773,21 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
775 | gpc2clk_target = 0; | 773 | gpc2clk_target = 0; |
776 | mclk_target = 0; | 774 | mclk_target = 0; |
777 | 775 | ||
778 | rcu_read_lock(); | 776 | nvgpu_spinlock_acquire(&arb->sessions_lock); |
779 | list_for_each_entry_rcu(session, &arb->sessions, link) { | 777 | list_for_each_entry(session, &arb->sessions, link) { |
780 | if (!session->zombie) { | 778 | if (!session->zombie) { |
781 | mclk_set = false; | 779 | mclk_set = false; |
782 | gpc2clk_set = false; | 780 | gpc2clk_set = false; |
783 | target = NV_ACCESS_ONCE(session->target) == | 781 | target = (session->target == &session->target_pool[0] ? |
784 | &session->target_pool[0] ? | ||
785 | &session->target_pool[1] : | 782 | &session->target_pool[1] : |
786 | &session->target_pool[0]; | 783 | &session->target_pool[0]); |
787 | /* Do not reorder pointer */ | 784 | nvgpu_spinlock_acquire(&session->session_lock); |
788 | nvgpu_smp_rmb(); | 785 | if (!list_empty(&session->targets)) { |
789 | head = llist_del_all(&session->targets); | ||
790 | if (head) { | ||
791 | |||
792 | /* Copy over state */ | 786 | /* Copy over state */ |
793 | target->mclk = session->target->mclk; | 787 | target->mclk = session->target->mclk; |
794 | target->gpc2clk = session->target->gpc2clk; | 788 | target->gpc2clk = session->target->gpc2clk; |
795 | /* Query the latest committed request */ | 789 | /* Query the latest committed request */ |
796 | llist_for_each_entry_safe(dev, tmp, head, | 790 | list_for_each_entry_safe(dev, tmp, &session->targets, |
797 | node) { | 791 | node) { |
798 | if (!mclk_set && dev->mclk_target_mhz) { | 792 | if (!mclk_set && dev->mclk_target_mhz) { |
799 | target->mclk = | 793 | target->mclk = |
@@ -807,12 +801,14 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
807 | gpc2clk_set = true; | 801 | gpc2clk_set = true; |
808 | } | 802 | } |
809 | nvgpu_ref_get(&dev->refcount); | 803 | nvgpu_ref_get(&dev->refcount); |
810 | llist_add(&dev->node, &arb->requests); | 804 | list_del(&dev->node); |
805 | nvgpu_spinlock_acquire(&arb->requests_lock); | ||
806 | list_add(&dev->node, &arb->requests); | ||
807 | nvgpu_spinlock_release(&arb->requests_lock); | ||
811 | } | 808 | } |
812 | /* Ensure target is updated before ptr sawp */ | ||
813 | nvgpu_smp_wmb(); | ||
814 | xchg(&session->target, target); | 809 | xchg(&session->target, target); |
815 | } | 810 | } |
811 | nvgpu_spinlock_release(&session->session_lock); | ||
816 | 812 | ||
817 | mclk_target = mclk_target > session->target->mclk ? | 813 | mclk_target = mclk_target > session->target->mclk ? |
818 | mclk_target : session->target->mclk; | 814 | mclk_target : session->target->mclk; |
@@ -822,7 +818,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | |||
822 | gpc2clk_target : session->target->gpc2clk; | 818 | gpc2clk_target : session->target->gpc2clk; |
823 | } | 819 | } |
824 | } | 820 | } |
825 | rcu_read_unlock(); | 821 | nvgpu_spinlock_release(&arb->sessions_lock); |
826 | 822 | ||
827 | gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : | 823 | gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : |
828 | arb->gpc2clk_default_mhz; | 824 | arb->gpc2clk_default_mhz; |
@@ -1010,22 +1006,24 @@ exit_arb: | |||
1010 | 1006 | ||
1011 | current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); | 1007 | current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); |
1012 | /* notify completion for all requests */ | 1008 | /* notify completion for all requests */ |
1013 | head = llist_del_all(&arb->requests); | 1009 | nvgpu_spinlock_acquire(&arb->requests_lock); |
1014 | llist_for_each_entry_safe(dev, tmp, head, node) { | 1010 | list_for_each_entry_safe(dev, tmp, &arb->requests, node) { |
1015 | nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM); | 1011 | nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM); |
1016 | nvgpu_cond_signal_interruptible(&dev->readout_wq); | 1012 | nvgpu_cond_signal_interruptible(&dev->readout_wq); |
1017 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | 1013 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); |
1014 | list_del(&dev->node); | ||
1018 | } | 1015 | } |
1016 | nvgpu_spinlock_release(&arb->requests_lock); | ||
1019 | 1017 | ||
1020 | nvgpu_atomic_set(&arb->notification_queue.head, | 1018 | nvgpu_atomic_set(&arb->notification_queue.head, |
1021 | nvgpu_atomic_read(&arb->notification_queue.tail)); | 1019 | nvgpu_atomic_read(&arb->notification_queue.tail)); |
1022 | /* notify event for all users */ | 1020 | /* notify event for all users */ |
1023 | rcu_read_lock(); | 1021 | nvgpu_spinlock_acquire(&arb->users_lock); |
1024 | list_for_each_entry_rcu(dev, &arb->users, link) { | 1022 | list_for_each_entry(dev, &arb->users, link) { |
1025 | alarms_notified |= | 1023 | alarms_notified |= |
1026 | nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); | 1024 | nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); |
1027 | } | 1025 | } |
1028 | rcu_read_unlock(); | 1026 | nvgpu_spinlock_release(&arb->users_lock); |
1029 | 1027 | ||
1030 | /* clear alarms */ | 1028 | /* clear alarms */ |
1031 | nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & | 1029 | nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & |
@@ -1054,6 +1052,7 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
1054 | goto mutex_fail; | 1052 | goto mutex_fail; |
1055 | nvgpu_spinlock_init(&arb->sessions_lock); | 1053 | nvgpu_spinlock_init(&arb->sessions_lock); |
1056 | nvgpu_spinlock_init(&arb->users_lock); | 1054 | nvgpu_spinlock_init(&arb->users_lock); |
1055 | nvgpu_spinlock_init(&arb->requests_lock); | ||
1057 | 1056 | ||
1058 | arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); | 1057 | arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); |
1059 | if (!arb->mclk_f_points) { | 1058 | if (!arb->mclk_f_points) { |
@@ -1119,9 +1118,9 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | |||
1119 | if (err < 0) | 1118 | if (err < 0) |
1120 | goto init_fail; | 1119 | goto init_fail; |
1121 | 1120 | ||
1122 | INIT_LIST_HEAD_RCU(&arb->users); | 1121 | INIT_LIST_HEAD(&arb->users); |
1123 | INIT_LIST_HEAD_RCU(&arb->sessions); | 1122 | INIT_LIST_HEAD(&arb->sessions); |
1124 | init_llist_head(&arb->requests); | 1123 | INIT_LIST_HEAD(&arb->requests); |
1125 | 1124 | ||
1126 | nvgpu_cond_init(&arb->request_wq); | 1125 | nvgpu_cond_init(&arb->request_wq); |
1127 | arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, | 1126 | arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, |
@@ -1245,10 +1244,11 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, | |||
1245 | nvgpu_smp_wmb(); | 1244 | nvgpu_smp_wmb(); |
1246 | session->target = &session->target_pool[0]; | 1245 | session->target = &session->target_pool[0]; |
1247 | 1246 | ||
1248 | init_llist_head(&session->targets); | 1247 | INIT_LIST_HEAD(&session->targets); |
1248 | nvgpu_spinlock_init(&session->session_lock); | ||
1249 | 1249 | ||
1250 | nvgpu_spinlock_acquire(&arb->sessions_lock); | 1250 | nvgpu_spinlock_acquire(&arb->sessions_lock); |
1251 | list_add_tail_rcu(&session->link, &arb->sessions); | 1251 | list_add_tail(&session->link, &arb->sessions); |
1252 | nvgpu_spinlock_release(&arb->sessions_lock); | 1252 | nvgpu_spinlock_release(&arb->sessions_lock); |
1253 | 1253 | ||
1254 | *_session = session; | 1254 | *_session = session; |
@@ -1272,21 +1272,22 @@ void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount) | |||
1272 | struct nvgpu_clk_arb *arb = session->g->clk_arb; | 1272 | struct nvgpu_clk_arb *arb = session->g->clk_arb; |
1273 | struct gk20a *g = session->g; | 1273 | struct gk20a *g = session->g; |
1274 | struct nvgpu_clk_dev *dev, *tmp; | 1274 | struct nvgpu_clk_dev *dev, *tmp; |
1275 | struct llist_node *head; | ||
1276 | 1275 | ||
1277 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " "); | 1276 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " "); |
1278 | 1277 | ||
1279 | if (arb) { | 1278 | if (arb) { |
1280 | nvgpu_spinlock_acquire(&arb->sessions_lock); | 1279 | nvgpu_spinlock_acquire(&arb->sessions_lock); |
1281 | list_del_rcu(&session->link); | 1280 | list_del(&session->link); |
1282 | nvgpu_spinlock_release(&arb->sessions_lock); | 1281 | nvgpu_spinlock_release(&arb->sessions_lock); |
1283 | } | 1282 | } |
1284 | 1283 | ||
1285 | head = llist_del_all(&session->targets); | 1284 | nvgpu_spinlock_acquire(&session->session_lock); |
1286 | llist_for_each_entry_safe(dev, tmp, head, node) { | 1285 | list_for_each_entry_safe(dev, tmp, &session->targets, node) { |
1287 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | 1286 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); |
1287 | list_del(&dev->node); | ||
1288 | } | 1288 | } |
1289 | synchronize_rcu(); | 1289 | nvgpu_spinlock_release(&session->session_lock); |
1290 | |||
1290 | nvgpu_kfree(g, session); | 1291 | nvgpu_kfree(g, session); |
1291 | } | 1292 | } |
1292 | 1293 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h index b66876da..0942dd86 100644 --- a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h +++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h | |||
@@ -41,11 +41,12 @@ | |||
41 | struct nvgpu_clk_arb { | 41 | struct nvgpu_clk_arb { |
42 | struct nvgpu_spinlock sessions_lock; | 42 | struct nvgpu_spinlock sessions_lock; |
43 | struct nvgpu_spinlock users_lock; | 43 | struct nvgpu_spinlock users_lock; |
44 | struct nvgpu_spinlock requests_lock; | ||
44 | 45 | ||
45 | struct nvgpu_mutex pstate_lock; | 46 | struct nvgpu_mutex pstate_lock; |
46 | struct list_head users; | 47 | struct list_head users; |
47 | struct list_head sessions; | 48 | struct list_head sessions; |
48 | struct llist_head requests; | 49 | struct list_head requests; |
49 | 50 | ||
50 | struct gk20a *g; | 51 | struct gk20a *g; |
51 | int status; | 52 | int status; |
@@ -92,7 +93,7 @@ struct nvgpu_clk_dev { | |||
92 | struct nvgpu_clk_session *session; | 93 | struct nvgpu_clk_session *session; |
93 | union { | 94 | union { |
94 | struct list_head link; | 95 | struct list_head link; |
95 | struct llist_node node; | 96 | struct list_head node; |
96 | }; | 97 | }; |
97 | struct nvgpu_cond readout_wq; | 98 | struct nvgpu_cond readout_wq; |
98 | nvgpu_atomic_t poll_mask; | 99 | nvgpu_atomic_t poll_mask; |
@@ -110,8 +111,9 @@ struct nvgpu_clk_session { | |||
110 | struct gk20a *g; | 111 | struct gk20a *g; |
111 | struct nvgpu_ref refcount; | 112 | struct nvgpu_ref refcount; |
112 | struct list_head link; | 113 | struct list_head link; |
113 | struct llist_head targets; | 114 | struct list_head targets; |
114 | 115 | ||
116 | struct nvgpu_spinlock session_lock; | ||
115 | struct nvgpu_clk_arb_target target_pool[2]; | 117 | struct nvgpu_clk_arb_target target_pool[2]; |
116 | struct nvgpu_clk_arb_target *target; | 118 | struct nvgpu_clk_arb_target *target; |
117 | }; | 119 | }; |
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c index 2bf2e653..39f8948b 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c | |||
@@ -16,9 +16,8 @@ | |||
16 | 16 | ||
17 | #include <linux/cdev.h> | 17 | #include <linux/cdev.h> |
18 | #include <linux/file.h> | 18 | #include <linux/file.h> |
19 | #include <linux/list.h> | ||
19 | #include <linux/anon_inodes.h> | 20 | #include <linux/anon_inodes.h> |
20 | #include <linux/rculist.h> | ||
21 | #include <linux/llist.h> | ||
22 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
23 | #include <linux/poll.h> | 22 | #include <linux/poll.h> |
24 | #ifdef CONFIG_DEBUG_FS | 23 | #ifdef CONFIG_DEBUG_FS |
@@ -103,12 +102,11 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | |||
103 | 102 | ||
104 | if (arb) { | 103 | if (arb) { |
105 | nvgpu_spinlock_acquire(&arb->users_lock); | 104 | nvgpu_spinlock_acquire(&arb->users_lock); |
106 | list_del_rcu(&dev->link); | 105 | list_del(&dev->link); |
107 | nvgpu_spinlock_release(&arb->users_lock); | 106 | nvgpu_spinlock_release(&arb->users_lock); |
108 | nvgpu_clk_notification_queue_free(arb->g, &dev->queue); | 107 | nvgpu_clk_notification_queue_free(arb->g, &dev->queue); |
109 | } | 108 | } |
110 | 109 | ||
111 | synchronize_rcu(); | ||
112 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | 110 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); |
113 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | 111 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); |
114 | 112 | ||
@@ -372,7 +370,7 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | |||
372 | dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); | 370 | dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); |
373 | 371 | ||
374 | nvgpu_spinlock_acquire(&arb->users_lock); | 372 | nvgpu_spinlock_acquire(&arb->users_lock); |
375 | list_add_tail_rcu(&dev->link, &arb->users); | 373 | list_add_tail(&dev->link, &arb->users); |
376 | nvgpu_spinlock_release(&arb->users_lock); | 374 | nvgpu_spinlock_release(&arb->users_lock); |
377 | 375 | ||
378 | *event_fd = fd; | 376 | *event_fd = fd; |
@@ -423,7 +421,9 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | |||
423 | goto fdput_fd; | 421 | goto fdput_fd; |
424 | } | 422 | } |
425 | nvgpu_ref_get(&dev->refcount); | 423 | nvgpu_ref_get(&dev->refcount); |
426 | llist_add(&dev->node, &session->targets); | 424 | nvgpu_spinlock_acquire(&session->session_lock); |
425 | list_add(&dev->node, &session->targets); | ||
426 | nvgpu_spinlock_release(&session->session_lock); | ||
427 | if (arb->update_work_queue) | 427 | if (arb->update_work_queue) |
428 | queue_work(arb->update_work_queue, &arb->update_fn_work); | 428 | queue_work(arb->update_work_queue, &arb->update_fn_work); |
429 | 429 | ||