diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 115 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.h | 12 |
2 files changed, 127 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index 3e14fa9c..d823ef9c 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |||
@@ -57,10 +57,112 @@ struct gk20a_sync_pt { | |||
57 | 57 | ||
58 | struct gk20a_sync_pt_inst { | 58 | struct gk20a_sync_pt_inst { |
59 | struct sync_pt pt; | 59 | struct sync_pt pt; |
60 | |||
61 | /* | ||
62 | * Magic number to identify a gk20a_sync_pt_inst from either a struct | ||
63 | * fence or a struct sync_pt. | ||
64 | */ | ||
65 | #define GK20A_SYNC_PT_INST_MAGIC 0xb333eeef; | ||
66 | u32 magic; | ||
67 | |||
60 | struct gk20a_sync_pt *shared; | 68 | struct gk20a_sync_pt *shared; |
61 | }; | 69 | }; |
62 | 70 | ||
63 | /** | 71 | /** |
72 | * Check if a sync_pt is a gk20a_sync_pt_inst. | ||
73 | */ | ||
74 | int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt) | ||
75 | { | ||
76 | struct gk20a_sync_pt_inst *pti = | ||
77 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
78 | |||
79 | return pti->magic == GK20A_SYNC_PT_INST_MAGIC; | ||
80 | } | ||
81 | |||
82 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) | ||
83 | /** | ||
84 | * Check if a fence is a gk20a_sync_pt_inst. | ||
85 | */ | ||
86 | int gk20a_is_gk20a_sync_pt_inst(struct fence *f) | ||
87 | { | ||
88 | struct sync_pt *pt = container_of(f, struct sync_pt, base); | ||
89 | |||
90 | return __gk20a_is_gk20a_sync_pt_inst(pt); | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the | ||
95 | * passed fence is in fact a gk20a_sync_pt_inst - use | ||
96 | * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. | ||
97 | */ | ||
98 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f) | ||
99 | { | ||
100 | struct sync_pt *pt = container_of(f, struct sync_pt, base); | ||
101 | struct gk20a_sync_pt_inst *pti = | ||
102 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
103 | |||
104 | BUG_ON(!gk20a_is_gk20a_sync_pt_inst(f)); | ||
105 | |||
106 | return pti->shared->sema; | ||
107 | } | ||
108 | #else | ||
109 | /** | ||
110 | * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the | ||
111 | * passed sync_pt is in fact a gk20a_sync_pt_inst - use | ||
112 | * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function. | ||
113 | */ | ||
114 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt) | ||
115 | { | ||
116 | struct gk20a_sync_pt_inst *pti; | ||
117 | |||
118 | BUG_ON(!__gk20a_is_gk20a_sync_pt_inst(pt)); | ||
119 | pti = container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
120 | |||
121 | return pti->shared->sema; | ||
122 | } | ||
123 | #endif | ||
124 | |||
125 | /** | ||
126 | * Check if the passed sync_fence is backed by a single GPU semaphore. In such | ||
127 | * cases we can short circuit a lot of SW involved in signaling pre-fences and | ||
128 | * post fences. | ||
129 | */ | ||
130 | int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence) | ||
131 | { | ||
132 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
133 | struct sync_pt *pt; | ||
134 | int i = 0; | ||
135 | |||
136 | if (list_empty(&fence->pt_list_head)) | ||
137 | return 0; | ||
138 | |||
139 | /* | ||
140 | * For now reject multi-sync_pt fences. This could be changed in | ||
141 | * future. It would require that the sema fast path push a sema | ||
142 | * acquire for each semaphore in the fence. | ||
143 | */ | ||
144 | list_for_each_entry(pt, &fence->pt_list_head, pt_list) { | ||
145 | i++; | ||
146 | |||
147 | if (i >= 2) | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | pt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); | ||
152 | return __gk20a_is_gk20a_sync_pt_inst(pt); | ||
153 | |||
154 | #else | ||
155 | struct sync_fence_cb *cb0 = &fence->cbs[0]; | ||
156 | |||
157 | if (fence->num_fences != 1) | ||
158 | return 0; | ||
159 | |||
160 | return gk20a_is_gk20a_sync_pt_inst(cb0->sync_pt); | ||
161 | #endif | ||
162 | } | ||
163 | |||
164 | |||
165 | /** | ||
64 | * Compares sync pt values a and b, both of which will trigger either before | 166 | * Compares sync pt values a and b, both of which will trigger either before |
65 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | 167 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after |
66 | * ref). Supplying ref allows us to handle wrapping correctly. | 168 | * ref). Supplying ref allows us to handle wrapping correctly. |
@@ -181,6 +283,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst( | |||
181 | if (!pti) | 283 | if (!pti) |
182 | return NULL; | 284 | return NULL; |
183 | 285 | ||
286 | pti->magic = GK20A_SYNC_PT_INST_MAGIC; | ||
184 | pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); | 287 | pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); |
185 | if (!pti->shared) { | 288 | if (!pti->shared) { |
186 | sync_pt_free(&pti->pt); | 289 | sync_pt_free(&pti->pt); |
@@ -327,11 +430,23 @@ static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | |||
327 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | 430 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); |
328 | } | 431 | } |
329 | 432 | ||
433 | static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | ||
434 | char *str, int size) | ||
435 | { | ||
436 | snprintf(str, size, "gk20a-sema"); | ||
437 | } | ||
438 | |||
330 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | 439 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, |
331 | int size) | 440 | int size) |
332 | { | 441 | { |
333 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | 442 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); |
334 | ktime_t dur = gk20a_sync_pt_duration(sync_pt); | 443 | ktime_t dur = gk20a_sync_pt_duration(sync_pt); |
444 | |||
445 | if (pt->sema) { | ||
446 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | ||
447 | return; | ||
448 | } | ||
449 | |||
335 | if (pt->dep) { | 450 | if (pt->dep) { |
336 | snprintf(str, size, "(dep: [%p] %s) %d", | 451 | snprintf(str, size, "(dep: [%p] %s) %d", |
337 | pt->dep, pt->dep->name, pt->thresh); | 452 | pt->dep, pt->dep->name, pt->thresh); |
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h index e9c26221..f885febd 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h | |||
@@ -19,11 +19,23 @@ | |||
19 | #define _GK20A_SYNC_H_ | 19 | #define _GK20A_SYNC_H_ |
20 | 20 | ||
21 | #include <linux/types.h> | 21 | #include <linux/types.h> |
22 | #include <linux/version.h> | ||
22 | 23 | ||
23 | struct sync_timeline; | 24 | struct sync_timeline; |
24 | struct sync_fence; | 25 | struct sync_fence; |
25 | struct sync_pt; | 26 | struct sync_pt; |
26 | struct gk20a_semaphore; | 27 | struct gk20a_semaphore; |
28 | struct fence; | ||
29 | |||
30 | int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt); | ||
31 | int gk20a_is_gk20a_sync_pt_inst(struct fence *f); | ||
32 | int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence); | ||
33 | |||
34 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) | ||
35 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f); | ||
36 | #else | ||
37 | struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt); | ||
38 | #endif | ||
27 | 39 | ||
28 | #ifdef CONFIG_SYNC | 40 | #ifdef CONFIG_SYNC |
29 | struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); | 41 | struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); |