summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-06-09 19:03:17 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-06-16 15:50:28 -0400
commit03164b0f4abeb06d4b5207531b97f11fe4a860dd (patch)
tree16d063e9c8999a660eef9e601fc9f6d6ae1bcaaf /drivers/gpu/nvgpu/gk20a/sync_gk20a.c
parent12661e4a48db6588a3f61a5729aff1eec7a9d17c (diff)
gpu: nvgpu: Allow nvgpu sync_pts to be introspected
Allow nvgpu to identify sync_pts that were made by nvgpu so that the underlying data structures can be accessed. This is important for the semaphore fast-path that allows nvgpu to skip doing a long CPU wait on a sync_fence. Bug 1732449 JIRA DNVGPU-12 Change-Id: Iea43de21d2d7a4e75db6b6dbf24efb78ce64d619 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1162688 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 3e14fa9c..d823ef9c 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -57,10 +57,112 @@ struct gk20a_sync_pt {
57 57
58struct gk20a_sync_pt_inst { 58struct gk20a_sync_pt_inst {
59 struct sync_pt pt; 59 struct sync_pt pt;
60
61 /*
62 * Magic number to identify a gk20a_sync_pt_inst from either a struct
63 * fence or a struct sync_pt.
64 */
65#define GK20A_SYNC_PT_INST_MAGIC 0xb333eeef;
66 u32 magic;
67
60 struct gk20a_sync_pt *shared; 68 struct gk20a_sync_pt *shared;
61}; 69};
62 70
63/** 71/**
72 * Check if a sync_pt is a gk20a_sync_pt_inst.
73 */
74int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt)
75{
76 struct gk20a_sync_pt_inst *pti =
77 container_of(pt, struct gk20a_sync_pt_inst, pt);
78
79 return pti->magic == GK20A_SYNC_PT_INST_MAGIC;
80}
81
82#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
83/**
84 * Check if a fence is a gk20a_sync_pt_inst.
85 */
86int gk20a_is_gk20a_sync_pt_inst(struct fence *f)
87{
88 struct sync_pt *pt = container_of(f, struct sync_pt, base);
89
90 return __gk20a_is_gk20a_sync_pt_inst(pt);
91}
92
93/**
94 * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the
95 * passed fence is in fact a gk20a_sync_pt_inst - use
96 * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function.
97 */
98struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f)
99{
100 struct sync_pt *pt = container_of(f, struct sync_pt, base);
101 struct gk20a_sync_pt_inst *pti =
102 container_of(pt, struct gk20a_sync_pt_inst, pt);
103
104 BUG_ON(!gk20a_is_gk20a_sync_pt_inst(f));
105
106 return pti->shared->sema;
107}
108#else
109/**
110 * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the
111 * passed sync_pt is in fact a gk20a_sync_pt_inst - use
112 * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function.
113 */
114struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt)
115{
116 struct gk20a_sync_pt_inst *pti;
117
118 BUG_ON(!__gk20a_is_gk20a_sync_pt_inst(pt));
119 pti = container_of(pt, struct gk20a_sync_pt_inst, pt);
120
121 return pti->shared->sema;
122}
123#endif
124
125/**
126 * Check if the passed sync_fence is backed by a single GPU semaphore. In such
127 * cases we can short circuit a lot of SW involved in signaling pre-fences and
128 * post fences.
129 */
130int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
131{
132#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
133 struct sync_pt *pt;
134 int i = 0;
135
136 if (list_empty(&fence->pt_list_head))
137 return 0;
138
139 /*
140 * For now reject multi-sync_pt fences. This could be changed in
141 * future. It would require that the sema fast path push a sema
142 * acquire for each semaphore in the fence.
143 */
144 list_for_each_entry(pt, &fence->pt_list_head, pt_list) {
145 i++;
146
147 if (i >= 2)
148 return 0;
149 }
150
151 pt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list);
152 return __gk20a_is_gk20a_sync_pt_inst(pt);
153
154#else
155 struct sync_fence_cb *cb0 = &fence->cbs[0];
156
157 if (fence->num_fences != 1)
158 return 0;
159
160 return gk20a_is_gk20a_sync_pt_inst(cb0->sync_pt);
161#endif
162}
163
164
165/**
64 * Compares sync pt values a and b, both of which will trigger either before 166 * Compares sync pt values a and b, both of which will trigger either before
65 * or after ref (i.e. a and b trigger before ref, or a and b trigger after 167 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
66 * ref). Supplying ref allows us to handle wrapping correctly. 168 * ref). Supplying ref allows us to handle wrapping correctly.
@@ -181,6 +283,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
181 if (!pti) 283 if (!pti)
182 return NULL; 284 return NULL;
183 285
286 pti->magic = GK20A_SYNC_PT_INST_MAGIC;
184 pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); 287 pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency);
185 if (!pti->shared) { 288 if (!pti->shared) {
186 sync_pt_free(&pti->pt); 289 sync_pt_free(&pti->pt);
@@ -327,11 +430,23 @@ static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
327 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); 430 snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
328} 431}
329 432
433static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
434 char *str, int size)
435{
436 snprintf(str, size, "gk20a-sema");
437}
438
330static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, 439static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
331 int size) 440 int size)
332{ 441{
333 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); 442 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
334 ktime_t dur = gk20a_sync_pt_duration(sync_pt); 443 ktime_t dur = gk20a_sync_pt_duration(sync_pt);
444
445 if (pt->sema) {
446 gk20a_sync_pt_value_str_for_sema(pt, str, size);
447 return;
448 }
449
335 if (pt->dep) { 450 if (pt->dep) {
336 snprintf(str, size, "(dep: [%p] %s) %d", 451 snprintf(str, size, "(dep: [%p] %s) %d",
337 pt->dep, pt->dep->name, pt->thresh); 452 pt->dep, pt->dep->name, pt->thresh);