summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb.c255
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb_linux.h27
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
5 files changed, 251 insertions, 40 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
index bb0fd628..2a6278e8 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -397,17 +397,14 @@ exit_vf_table:
397 if (status < 0) 397 if (status < 0)
398 nvgpu_clk_arb_set_global_alarm(g, 398 nvgpu_clk_arb_set_global_alarm(g,
399 EVENT(ALARM_VF_TABLE_UPDATE_FAILED)); 399 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
400 if (arb->update_work_queue) 400 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
401 queue_work(arb->update_work_queue, &arb->update_fn_work);
402 401
403 return status; 402 return status;
404} 403}
405 404
406 405
407static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) 406static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
408{ 407{
409 struct nvgpu_clk_arb *arb =
410 container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
411 struct gk20a *g = arb->g; 408 struct gk20a *g = arb->g;
412 u32 err; 409 u32 err;
413 410
@@ -417,9 +414,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
417 nvgpu_err(g, "failed to cache VF table"); 414 nvgpu_err(g, "failed to cache VF table");
418 nvgpu_clk_arb_set_global_alarm(g, 415 nvgpu_clk_arb_set_global_alarm(g,
419 EVENT(ALARM_VF_TABLE_UPDATE_FAILED)); 416 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
420 if (arb->update_work_queue) 417 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
421 queue_work(arb->update_work_queue,
422 &arb->update_fn_work);
423 418
424 return; 419 return;
425 } 420 }
@@ -725,10 +720,8 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
725 current_mask, new_mask))); 720 current_mask, new_mask)));
726} 721}
727 722
728static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 723static void nvgpu_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
729{ 724{
730 struct nvgpu_clk_arb *arb =
731 container_of(work, struct nvgpu_clk_arb, update_fn_work);
732 struct nvgpu_clk_session *session; 725 struct nvgpu_clk_session *session;
733 struct nvgpu_clk_dev *dev; 726 struct nvgpu_clk_dev *dev;
734 struct nvgpu_clk_dev *tmp; 727 struct nvgpu_clk_dev *tmp;
@@ -1027,6 +1020,205 @@ exit_arb:
1027 ~EVENT(ALARM_GPU_LOST)); 1020 ~EVENT(ALARM_GPU_LOST));
1028} 1021}
1029 1022
1023/*
1024 * Process one scheduled work item.
1025 */
1026static void nvgpu_clk_arb_worker_process_item(
1027 struct nvgpu_clk_arb_work_item *work_item)
1028{
1029 nvgpu_log(work_item->arb->g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1030
1031 if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE)
1032 nvgpu_clk_arb_run_vf_table_cb(work_item->arb);
1033 else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB)
1034 nvgpu_clk_arb_run_arbiter_cb(work_item->arb);
1035}
1036
1037/**
1038 * Tell the worker that one more work needs to be done.
1039 *
1040 * Increase the work counter to synchronize the worker with the new work. Wake
1041 * up the worker. If the worker was already running, it will handle this work
1042 * before going to sleep.
1043 */
1044static int nvgpu_clk_arb_worker_wakeup(struct gk20a *g)
1045{
1046 int put;
1047
1048 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1049
1050 put = nvgpu_atomic_inc_return(&g->clk_arb_worker.put);
1051 nvgpu_cond_signal_interruptible(&g->clk_arb_worker.wq);
1052
1053 return put;
1054}
1055
1056/**
1057 * Test if there is some work pending.
1058 *
1059 * This is a pair for nvgpu_clk_arb_worker_wakeup to be called from the
1060 * worker. The worker has an internal work counter which is incremented once
1061 * per finished work item. This is compared with the number of queued jobs.
1062 */
1063static bool nvgpu_clk_arb_worker_pending(struct gk20a *g, int get)
1064{
1065 bool pending = nvgpu_atomic_read(&g->clk_arb_worker.put) != get;
1066
1067 /* We don't need barriers because they are implicit in locking */
1068 return pending;
1069}
1070
1071/**
1072 * Process the queued works for the worker thread serially.
1073 *
1074 * Flush all the work items in the queue one by one. This may block timeout
1075 * handling for a short while, as these are serialized.
1076 */
1077static void nvgpu_clk_arb_worker_process(struct gk20a *g, int *get)
1078{
1079
1080 while (nvgpu_clk_arb_worker_pending(g, *get)) {
1081 struct nvgpu_clk_arb_work_item *work_item = NULL;
1082
1083 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
1084 if (!nvgpu_list_empty(&g->clk_arb_worker.items)) {
1085 work_item = nvgpu_list_first_entry(&g->clk_arb_worker.items,
1086 nvgpu_clk_arb_work_item, worker_item);
1087 nvgpu_list_del(&work_item->worker_item);
1088 }
1089 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1090
1091 if (!work_item) {
1092 /*
1093 * Woke up for some other reason, but there are no
1094 * other reasons than a work item added in the items list
1095 * currently, so warn and ack the message.
1096 */
1097 nvgpu_warn(g, "Spurious worker event!");
1098 ++*get;
1099 break;
1100 }
1101
1102 nvgpu_clk_arb_worker_process_item(work_item);
1103 ++*get;
1104 }
1105}
1106
1107/*
1108 * Process all work items found in the clk arbiter work queue.
1109 */
1110static int nvgpu_clk_arb_poll_worker(void *arg)
1111{
1112 struct gk20a *g = (struct gk20a *)arg;
1113 struct gk20a_worker *worker = &g->clk_arb_worker;
1114 int get = 0;
1115
1116 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1117
1118 while (!nvgpu_thread_should_stop(&worker->poll_task)) {
1119 int ret;
1120
1121 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
1122 &worker->wq,
1123 nvgpu_clk_arb_worker_pending(g, get), 0);
1124
1125 if (ret == 0)
1126 nvgpu_clk_arb_worker_process(g, &get);
1127 }
1128 return 0;
1129}
1130
1131static int __nvgpu_clk_arb_worker_start(struct gk20a *g)
1132{
1133 char thread_name[64];
1134 int err = 0;
1135
1136 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task))
1137 return err;
1138
1139 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
1140
1141 /*
1142 * Mutexes have implicit barriers, so there is no risk of a thread
1143 * having a stale copy of the poll_task variable as the call to
1144 * thread_is_running is volatile
1145 */
1146
1147 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task)) {
1148 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1149 return err;
1150 }
1151
1152 snprintf(thread_name, sizeof(thread_name),
1153 "nvgpu_clk_arb_poll_%s", g->name);
1154
1155 err = nvgpu_thread_create(&g->clk_arb_worker.poll_task, g,
1156 nvgpu_clk_arb_poll_worker, thread_name);
1157
1158 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1159 return err;
1160}
1161
1162/**
1163 * Append a work item to the worker's list.
1164 *
1165 * This adds work item to the end of the list and wakes the worker
1166 * up immediately. If the work item already existed in the list, it's not added,
1167 * because in that case it has been scheduled already but has not yet been
1168 * processed.
1169 */
1170void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
1171 struct nvgpu_clk_arb_work_item *work_item)
1172{
1173 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1174
1175 /*
1176 * Warn if worker thread cannot run
1177 */
1178 if (WARN_ON(__nvgpu_clk_arb_worker_start(g))) {
1179 nvgpu_warn(g, "clk arb worker cannot run!");
1180 return;
1181 }
1182
1183 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
1184 if (!nvgpu_list_empty(&work_item->worker_item)) {
1185 /*
1186 * Already queued, so will get processed eventually.
1187 * The worker is probably awake already.
1188 */
1189 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1190 return;
1191 }
1192 nvgpu_list_add_tail(&work_item->worker_item, &g->clk_arb_worker.items);
1193 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1194
1195 nvgpu_clk_arb_worker_wakeup(g);
1196}
1197
1198/**
1199 * Initialize the clk arb worker's metadata and start the background thread.
1200 */
1201int nvgpu_clk_arb_worker_init(struct gk20a *g)
1202{
1203 int err;
1204
1205 nvgpu_atomic_set(&g->clk_arb_worker.put, 0);
1206 nvgpu_cond_init(&g->clk_arb_worker.wq);
1207 nvgpu_init_list_node(&g->clk_arb_worker.items);
1208 nvgpu_spinlock_init(&g->clk_arb_worker.items_lock);
1209 err = nvgpu_mutex_init(&g->clk_arb_worker.start_lock);
1210 if (err)
1211 goto error_check;
1212
1213 err = __nvgpu_clk_arb_worker_start(g);
1214error_check:
1215 if (err) {
1216 nvgpu_err(g, "failed to start clk arb poller thread");
1217 return err;
1218 }
1219 return 0;
1220}
1221
1030int nvgpu_clk_arb_init_arbiter(struct gk20a *g) 1222int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1031{ 1223{
1032 struct nvgpu_clk_arb *arb; 1224 struct nvgpu_clk_arb *arb;
@@ -1120,15 +1312,17 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1120 nvgpu_init_list_node(&arb->requests); 1312 nvgpu_init_list_node(&arb->requests);
1121 1313
1122 nvgpu_cond_init(&arb->request_wq); 1314 nvgpu_cond_init(&arb->request_wq);
1123 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1124 "vf_table_update");
1125 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1126 "arbiter_update");
1127
1128 1315
1129 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb); 1316 nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
1317 nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
1318 arb->update_vf_table_work_item.arb = arb;
1319 arb->update_arb_work_item.arb = arb;
1320 arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
1321 arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
1130 1322
1131 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); 1323 err = nvgpu_clk_arb_worker_init(g);
1324 if (err < 0)
1325 goto init_fail;
1132 1326
1133#ifdef CONFIG_DEBUG_FS 1327#ifdef CONFIG_DEBUG_FS
1134 arb->debug = &arb->debug_pool[0]; 1328 arb->debug = &arb->debug_pool[0];
@@ -1183,8 +1377,14 @@ void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
1183 struct nvgpu_clk_arb *arb = g->clk_arb; 1377 struct nvgpu_clk_arb *arb = g->clk_arb;
1184 1378
1185 nvgpu_clk_arb_set_global_alarm(g, alarm); 1379 nvgpu_clk_arb_set_global_alarm(g, alarm);
1186 if (arb->update_work_queue) 1380 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1187 queue_work(arb->update_work_queue, &arb->update_fn_work); 1381}
1382
1383void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
1384{
1385 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
1386 nvgpu_thread_stop(&g->clk_arb_worker.poll_task);
1387 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1188} 1388}
1189 1389
1190void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) 1390void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
@@ -1193,13 +1393,7 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
1193 int index; 1393 int index;
1194 1394
1195 if (arb) { 1395 if (arb) {
1196 cancel_work_sync(&arb->vf_table_fn_work); 1396 nvgpu_clk_arb_worker_deinit(g);
1197 destroy_workqueue(arb->vf_table_work_queue);
1198 arb->vf_table_work_queue = NULL;
1199
1200 cancel_work_sync(&arb->update_fn_work);
1201 destroy_workqueue(arb->update_work_queue);
1202 arb->update_work_queue = NULL;
1203 1397
1204 nvgpu_kfree(g, arb->gpc2clk_f_points); 1398 nvgpu_kfree(g, arb->gpc2clk_f_points);
1205 nvgpu_kfree(g, arb->mclk_f_points); 1399 nvgpu_kfree(g, arb->mclk_f_points);
@@ -1298,16 +1492,15 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
1298 1492
1299 session->zombie = true; 1493 session->zombie = true;
1300 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); 1494 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1301 if (arb && arb->update_work_queue) 1495 if (arb)
1302 queue_work(arb->update_work_queue, &arb->update_fn_work); 1496 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1303} 1497}
1304 1498
1305void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g) 1499void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1306{ 1500{
1307 struct nvgpu_clk_arb *arb = g->clk_arb; 1501 struct nvgpu_clk_arb *arb = g->clk_arb;
1308 1502
1309 if (arb->vf_table_work_queue) 1503 nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
1310 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1311} 1504}
1312 1505
1313/* This function is inherently unsafe to call while arbiter is running 1506/* This function is inherently unsafe to call while arbiter is running
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
index e5ada25d..464590d5 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
@@ -39,6 +39,18 @@
39 * The defines here should finally move to clk_arb.h, once these are 39 * The defines here should finally move to clk_arb.h, once these are
40 * refactored to be free of Linux fields. 40 * refactored to be free of Linux fields.
41 */ 41 */
42
43enum clk_arb_work_item_type {
44 CLK_ARB_WORK_UPDATE_VF_TABLE,
45 CLK_ARB_WORK_UPDATE_ARB
46};
47
48struct nvgpu_clk_arb_work_item {
49 enum clk_arb_work_item_type item_type;
50 struct nvgpu_clk_arb *arb;
51 struct nvgpu_list_node worker_item;
52};
53
42struct nvgpu_clk_arb { 54struct nvgpu_clk_arb {
43 struct nvgpu_spinlock sessions_lock; 55 struct nvgpu_spinlock sessions_lock;
44 struct nvgpu_spinlock users_lock; 56 struct nvgpu_spinlock users_lock;
@@ -62,10 +74,8 @@ struct nvgpu_clk_arb {
62 u16 gpc2clk_min, gpc2clk_max; 74 u16 gpc2clk_min, gpc2clk_max;
63 u16 mclk_min, mclk_max; 75 u16 mclk_min, mclk_max;
64 76
65 struct work_struct update_fn_work; 77 struct nvgpu_clk_arb_work_item update_vf_table_work_item;
66 struct workqueue_struct *update_work_queue; 78 struct nvgpu_clk_arb_work_item update_arb_work_item;
67 struct work_struct vf_table_fn_work;
68 struct workqueue_struct *vf_table_work_queue;
69 79
70 struct nvgpu_cond request_wq; 80 struct nvgpu_cond request_wq;
71 81
@@ -140,5 +150,14 @@ nvgpu_clk_dev_from_link(struct nvgpu_list_node *node)
140 ((uintptr_t)node - offsetof(struct nvgpu_clk_dev, link)); 150 ((uintptr_t)node - offsetof(struct nvgpu_clk_dev, link));
141}; 151};
142 152
153static inline struct nvgpu_clk_arb_work_item *
154nvgpu_clk_arb_work_item_from_worker_item(struct nvgpu_list_node *node)
155{
156 return (struct nvgpu_clk_arb_work_item *)
157 ((uintptr_t)node - offsetof(struct nvgpu_clk_arb_work_item, worker_item));
158};
159
160void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
161 struct nvgpu_clk_arb_work_item *work_item);
143#endif /* __NVGPU_CLK_ARB_LINUX_H__ */ 162#endif /* __NVGPU_CLK_ARB_LINUX_H__ */
144 163
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
index 6d09b4b8..039f65f8 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
@@ -424,8 +424,7 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
424 nvgpu_spinlock_acquire(&session->session_lock); 424 nvgpu_spinlock_acquire(&session->session_lock);
425 nvgpu_list_add(&dev->node, &session->targets); 425 nvgpu_list_add(&dev->node, &session->targets);
426 nvgpu_spinlock_release(&session->session_lock); 426 nvgpu_spinlock_release(&session->session_lock);
427 if (arb->update_work_queue) 427 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
428 queue_work(arb->update_work_queue, &arb->update_fn_work);
429 428
430fdput_fd: 429fdput_fd:
431 fdput(fd); 430 fdput(fd);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a7a08b5a..e65ed278 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1591,7 +1591,7 @@ static void gk20a_channel_worker_process(struct gk20a *g, int *get)
1591static int gk20a_channel_poll_worker(void *arg) 1591static int gk20a_channel_poll_worker(void *arg)
1592{ 1592{
1593 struct gk20a *g = (struct gk20a *)arg; 1593 struct gk20a *g = (struct gk20a *)arg;
1594 struct gk20a_channel_worker *worker = &g->channel_worker; 1594 struct gk20a_worker *worker = &g->channel_worker;
1595 unsigned long watchdog_interval = 100; /* milliseconds */ 1595 unsigned long watchdog_interval = 100; /* milliseconds */
1596 struct nvgpu_timeout timeout; 1596 struct nvgpu_timeout timeout;
1597 int get = 0; 1597 int get = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 75357a82..03cfe285 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1406,14 +1406,14 @@ struct gk20a {
1406 u32 ltc_count; 1406 u32 ltc_count;
1407 u32 ltc_streamid; 1407 u32 ltc_streamid;
1408 1408
1409 struct gk20a_channel_worker { 1409 struct gk20a_worker {
1410 struct nvgpu_thread poll_task; 1410 struct nvgpu_thread poll_task;
1411 nvgpu_atomic_t put; 1411 nvgpu_atomic_t put;
1412 struct nvgpu_cond wq; 1412 struct nvgpu_cond wq;
1413 struct nvgpu_list_node items; 1413 struct nvgpu_list_node items;
1414 struct nvgpu_spinlock items_lock; 1414 struct nvgpu_spinlock items_lock;
1415 struct nvgpu_mutex start_lock; 1415 struct nvgpu_mutex start_lock;
1416 } channel_worker; 1416 } channel_worker, clk_arb_worker;
1417 1417
1418 struct { 1418 struct {
1419 void (*open)(struct channel_gk20a *ch); 1419 void (*open)(struct channel_gk20a *ch);