diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 154 |
1 files changed, 127 insertions, 27 deletions
@@ -99,42 +99,103 @@ struct entry_tsg { | |||
99 | uint32_t tsgid:12; | 99 | uint32_t tsgid:12; |
100 | uint64_t padding4:52; | 100 | uint64_t padding4:52; |
101 | } __attribute__((packed)); | 101 | } __attribute__((packed)); |
102 | #define MAX_TSGID (1 << 12) | ||
102 | 103 | ||
103 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | 104 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; |
104 | 105 | ||
105 | /* Preempt | 106 | /* Preempt a TSG or Channel by ID |
106 | ID/CHID : Id of TSG or channel to preempt | 107 | ID/CHID : Id of TSG or channel to preempt |
108 | IS_PENDING : ???? | ||
109 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | ||
110 | |||
111 | Support: Kepler, Maxwell, Pascal, Volta | ||
107 | */ | 112 | */ |
108 | #define NV_PFIFO_PREEMPT 0x00002634 | 113 | #define NV_PFIFO_PREEMPT 0x00002634 |
109 | struct pfifo_preempt { | 114 | typedef union { |
110 | uint32_t id:12; | 115 | struct { |
111 | uint32_t padding:8; | 116 | uint32_t id:12; |
112 | bool is_pending:1; | 117 | uint32_t padding:8; |
113 | uint32_t padding2:3; | 118 | bool is_pending:1; |
114 | enum PREEMPT_TYPE type:2; | 119 | uint32_t padding2:3; |
115 | uint32_t padding3:6; | 120 | enum PREEMPT_TYPE type:2; |
116 | } __attribute__((packed)); | 121 | uint32_t padding3:6; |
122 | } __attribute__((packed)); | ||
123 | uint32_t raw; | ||
124 | } pfifo_preempt_t; | ||
125 | |||
126 | /* | ||
127 | "Initiate a preempt of the engine by writing the bit associated with its | ||
128 | runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT | ||
129 | for the preempt to complete." | ||
117 | 130 | ||
131 | Useful for preempting multiple runlists at once. | ||
132 | |||
133 | Appears to trigger an interrupt or some other side-effect on the Jetson | ||
134 | Xavier, as the built-in nvgpu driver seems to be disturbed by writing to this. | ||
135 | |||
136 | To select the runlist dynamically, use the BIT(nr) kernel macro. | ||
137 | Example: | ||
138 | runlist_preempt_t rl_preempt; | ||
139 | rl_preempt.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_PREEMPT); | ||
140 | rl_preempt.raw |= BIT(nr); | ||
141 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | ||
142 | |||
143 | Support: Volta | ||
144 | */ | ||
118 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 | 145 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 |
119 | struct runlist_preempt { | 146 | typedef union { |
120 | bool runlist_0:1; | 147 | struct { |
121 | bool runlist_1:1; | 148 | bool runlist_0:1; |
122 | bool runlist_2:1; | 149 | bool runlist_1:1; |
123 | bool runlist_3:1; | 150 | bool runlist_2:1; |
124 | bool runlist_4:1; | 151 | bool runlist_3:1; |
125 | bool runlist_5:1; | 152 | bool runlist_4:1; |
126 | bool runlist_6:1; | 153 | bool runlist_5:1; |
127 | bool runlist_7:1; | 154 | bool runlist_6:1; |
128 | bool runlist_8:1; | 155 | bool runlist_7:1; |
129 | bool runlist_9:1; | 156 | bool runlist_8:1; |
130 | bool runlist_10:1; | 157 | bool runlist_9:1; |
131 | bool runlist_11:1; | 158 | bool runlist_10:1; |
132 | bool runlist_12:1; | 159 | bool runlist_11:1; |
133 | bool runlist_13:1; | 160 | bool runlist_12:1; |
134 | uint32_t padding:28; | 161 | bool runlist_13:1; |
135 | } __attribute__((packed)); | 162 | uint32_t padding:18; |
163 | } __attribute__((packed)); | ||
164 | uint32_t raw; | ||
165 | } runlist_preempt_t; | ||
166 | |||
167 | /* Additional information on preempting from NVIDIA's driver (commit b1d0d8ece) | ||
168 | * "From h/w team | ||
169 | * Engine save can be blocked by eng stalling interrupts. | ||
170 | * FIFO interrupts shouldn’t block an engine save from | ||
171 | * finishing, but could block FIFO from reporting preempt done. | ||
172 | * No immediate reason to reset the engine if FIFO interrupt is | ||
173 | * pending. | ||
174 | * The hub, priv_ring, and ltc interrupts could block context | ||
175 | * switch (or memory), but doesn’t necessarily have to. | ||
176 | * For Hub interrupts they just report access counters and page | ||
177 | * faults. Neither of these necessarily block context switch | ||
178 | * or preemption, but they could. | ||
179 | * For example a page fault for graphics would prevent graphics | ||
180 | * from saving out. An access counter interrupt is a | ||
181 | * notification and has no effect. | ||
182 | * SW should handle page faults though for preempt to complete. | ||
183 | * PRI interrupt (due to a failed PRI transaction) will result | ||
184 | * in ctxsw failure reported to HOST. | ||
185 | * LTC interrupts are generally ECC related and if so, | ||
186 | * certainly don’t block preemption/ctxsw but they could. | ||
187 | * Bus interrupts shouldn’t have anything to do with preemption | ||
188 | * state as they are part of the Host EXT pipe, though they may | ||
189 | * exhibit a symptom that indicates that GPU is in a bad state. | ||
190 | * To be completely fair, when an engine is preempting SW | ||
191 | * really should just handle other interrupts as they come in. | ||
192 | * It’s generally bad to just poll and wait on a preempt | ||
193 | * to complete since there are many things in the GPU which may | ||
194 | * cause a system to hang/stop responding." | ||
195 | */ | ||
136 | 196 | ||
137 | // Note: This is different with Turing | 197 | // Note: This is different with Turing |
198 | // Support: Kepler, Maxwell, Pascal, Volta | ||
138 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 | 199 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 |
139 | typedef union { | 200 | typedef union { |
140 | struct { | 201 | struct { |
@@ -145,6 +206,7 @@ typedef union { | |||
145 | uint32_t raw; | 206 | uint32_t raw; |
146 | } runlist_base_t; | 207 | } runlist_base_t; |
147 | 208 | ||
209 | // Support: Kepler, Maxwell, Pascal, Volta | ||
148 | #define NV_PFIFO_RUNLIST 0x00002274 | 210 | #define NV_PFIFO_RUNLIST 0x00002274 |
149 | typedef union { | 211 | typedef union { |
150 | struct { | 212 | struct { |
@@ -175,8 +237,8 @@ enum CHANNEL_STATUS { | |||
175 | }; | 237 | }; |
176 | 238 | ||
177 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 239 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
178 | #define MAX_CHID 512 // TODO: Double-check this is right | ||
179 | // There are a total of 512 possible channels | 240 | // There are a total of 512 possible channels |
241 | #define MAX_CHID 512 | ||
180 | typedef union { | 242 | typedef union { |
181 | struct { | 243 | struct { |
182 | // 0:31 | 244 | // 0:31 |
@@ -202,6 +264,43 @@ typedef union { | |||
202 | uint64_t raw; | 264 | uint64_t raw; |
203 | } channel_ctrl_t; | 265 | } channel_ctrl_t; |
204 | 266 | ||
267 | /* Control word for runlist enable/disable. | ||
268 | |||
269 | RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled) | ||
270 | |||
271 | To select the runlist dynamically, use the BIT(nr) kernel macro. | ||
272 | Disabling example: | ||
273 | runlist_disable_t rl_disable; | ||
274 | rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); | ||
275 | rl_disable.raw |= BIT(nr); | ||
276 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
277 | Enabling example: | ||
278 | runlist_disable_t rl_disable; | ||
279 | rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); | ||
280 | rl_disable.raw &= ~BIT(nr); | ||
281 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
282 | |||
283 | Support: Fermi, Kepler, Maxwell, Pascal, Volta, Turing | ||
284 | */ | ||
285 | #define NV_PFIFO_SCHED_DISABLE 0x00002630 | ||
286 | typedef union { | ||
287 | struct { | ||
288 | bool runlist_0:1; | ||
289 | bool runlist_1:1; | ||
290 | bool runlist_2:1; | ||
291 | bool runlist_3:1; | ||
292 | bool runlist_4:1; | ||
293 | bool runlist_5:1; | ||
294 | bool runlist_6:1; | ||
295 | bool runlist_7:1; | ||
296 | bool runlist_8:1; | ||
297 | bool runlist_9:1; | ||
298 | bool runlist_10:1; | ||
299 | uint32_t padding:21; | ||
300 | } __attribute__((packed)); | ||
301 | uint32_t raw; | ||
302 | } runlist_disable_t; | ||
303 | |||
205 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 304 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
206 | 305 | ||
207 | #define for_chan_in_tsg(chan, tsg) \ | 306 | #define for_chan_in_tsg(chan, tsg) \ |
@@ -220,6 +319,7 @@ struct runlist_iter { | |||
220 | // Defined in runlist.c | 319 | // Defined in runlist.c |
221 | struct gk20a* get_live_gk20a(void); | 320 | struct gk20a* get_live_gk20a(void); |
222 | int get_runlist_iter(struct runlist_iter *rl_iter); | 321 | int get_runlist_iter(struct runlist_iter *rl_iter); |
322 | int preempt_tsg(uint32_t tsg_id); | ||
223 | 323 | ||
224 | static inline struct gk20a *get_gk20a(struct device *dev) { | 324 | static inline struct gk20a *get_gk20a(struct device *dev) { |
225 | // XXX: Only works because gk20a* is the first member of gk20a_platform | 325 | // XXX: Only works because gk20a* is the first member of gk20a_platform |