summaryrefslogtreecommitdiffstats
path: root/drivers/virt/tegra/vm_err.c
diff options
context:
space:
mode:
authorYashomati <ygodbole@nvidia.com>2019-05-31 21:59:52 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2019-12-24 14:56:43 -0500
commit87dc30edda5936afa82b0afa821c8be2e44343c5 (patch)
treee1f61e27e96e88880626426db82dbe21c85e6053 /drivers/virt/tegra/vm_err.c
parentcda3f78dc40d0f21b1108a4087b6198fb53bde02 (diff)
inject-vm-err: handlers for injected errors
If Linux/EBP causes an error that HV can't handle, then instead of freezing the guest, HV injects the error back into the guest. This enables the guest to handle the error as gracefully as it can/needs. This changeset provides 2 parts: 1. sample handlers: minimal placeholder handlers that just dump the error information on to the console. This is to be used as a reference for any customized elaborate error handling that may be needed. 2. library module: it comes into existence only if/when any error handler is registered. Its main responsibilities: - map memory that's shared with HV where HV dumps all information about the errors. - register handlers for interrupts used by HV to inject errors - invoke custom error handlers when HV injects error JIRA ESV-312 Bug 2580803 Change-Id: Ia8c6484d423fd33cabbfd901f0f6ebb0da95cb40 Signed-off-by: Yashomati <ygodbole@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2214402 Reviewed-on: https://git-master.nvidia.com/r/2128765 GVS: Gerrit_Virtual_Submit Reviewed-by: Dmitry Pervushin <dpervushin@nvidia.com> Reviewed-by: Hardik T Shah <hardikts@nvidia.com> Reviewed-by: Rohit Upadhyay <rupadhyay@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/virt/tegra/vm_err.c')
-rw-r--r--drivers/virt/tegra/vm_err.c535
1 files changed, 535 insertions, 0 deletions
diff --git a/drivers/virt/tegra/vm_err.c b/drivers/virt/tegra/vm_err.c
new file mode 100644
index 000000000..d9f11248c
--- /dev/null
+++ b/drivers/virt/tegra/vm_err.c
@@ -0,0 +1,535 @@
1/*
2 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#define pr_fmt(fmt) "vm-err: " fmt
14
15#include <linux/interrupt.h>
16#include <linux/of_irq.h>
17#include <linux/platform_device.h>
18#include <linux/vm_err.h>
19#include <asm/traps.h>
20#include <asm-generic/irq_regs.h>
21#include <asm/system_misc.h>
22#include <soc/tegra/virt/syscalls.h>
23#include <soc/tegra/chip-id.h>
24
25struct tegra_hv_err_ctrl {
26 struct device *dev;
27 struct errInfo *err_info;
28 unsigned int async_err_arr_items;
29 int hv_peer_err_irq_id;
30 unsigned int vcpu_cnt;
31 struct serr_hook hook;
32 struct vm_err_handlers handlers;
33};
34
35static struct tegra_hv_config config;
36
37static unsigned int intr_info[3]; /* intr_property_size = 3 */
38
39static struct property interrupts_prop = {
40 .name = "interrupts",
41};
42
43static bool check_sync_err(const unsigned int vcpu_id,
44 const struct tegra_hv_err_ctrl *const ctrl,
45 bool *send_sync_err_ack)
46{
47 uint64_t rd_idx;
48 const struct errData *err_data;
49
50 if (vcpu_id >= ctrl->vcpu_cnt) {
51 dev_crit(ctrl->dev, "%s: Invalid vcpu id %u\n", __func__,
52 vcpu_id);
53 *send_sync_err_ack = false;
54 /* Unexpected vcpu id. Enter bad mode. */
55 return true;
56 }
57
58 /* Shared memory layout is:
59 * |--async-err-metadata--|--async-errors-array-|--sync-errors-array-|
60 * Size of async errors array = Max errors + 1(to avoid same empty
61 * and full conditions of the buffer)
62 * Size of sync errors array = 1 error per VCPU * number of VCPUs in VM
63 */
64 rd_idx = ctrl->async_err_arr_items + vcpu_id;
65 /* It's already validated at init time that sufficient memory is
66 * allocated to hold async_err_arr_items + sync error per vcpu. Hence,
67 * after validating the vcpu_id above, no need to validate rd_idx here.
68 */
69 err_data = &(ctrl->err_info->errData[rd_idx]);
70 if (!err_data->sync_dataAbort.isFilled) {
71 *send_sync_err_ack = false;
72 dev_info(ctrl->dev, "No synchronous error data on vcpu %u\n",
73 vcpu_id);
74 /* No sync error. No need to enter bad mode. */
75 return false;
76 }
77
78 if (err_data->errType != SYNC) {
79 dev_crit(ctrl->dev, "%s: unexpected error Type %d\n",
80 __func__, err_data->errType);
81 *send_sync_err_ack = true;
82 /* Unexpected error id. Enter bad mode. */
83 return true;
84 }
85
86 if (err_data->offendingGuestId != config.guest_id_self) {
87 dev_crit(ctrl->dev, "%s: invalid offender id %u\n", __func__,
88 err_data->offendingGuestId);
89 *send_sync_err_ack = true;
90 /* Invalid id of offending guest. Enter bad mode. */
91 return true;
92 }
93 dev_err(ctrl->dev, "Synchronous error on vcpu %u\n", vcpu_id);
94
95 if (ctrl->handlers.fn_self_sync) {
96 *send_sync_err_ack = true;
97 /* Enter bad_mode (or otherwise) as custom handler dictates */
98 return ctrl->handlers.fn_self_sync(err_data);
99 }
100
101 /* should never reach here */
102 *send_sync_err_ack = true;
103 /* Reaching here is unexpected. Enter bad mode. */
104 return true;
105}
106
107static irqreturn_t async_err_handler(int irq, void *context)
108{
109 unsigned int num_async_errs_read = 0;
110 bool enter_bad_mode = false;
111 const struct tegra_hv_err_ctrl *const ctrl = context;
112 const unsigned int vcpu_id = hyp_read_vcpu_id();
113 uint64_t local_rd_idx, next_rd_idx;
114 const struct errData *err_data;
115 bool (*fn_self_async)(const struct errData *const err_data);
116 bool (*fn_peer)(const struct errData *const err_data);
117 bool (*handler)(const struct errData *const err_data);
118 struct pt_regs *regs;
119
120 if (vcpu_id != 0) {
121 dev_err(ctrl->dev, "Asynchronous error on vcpu %u\n", vcpu_id);
122 /* Only VCPU0 is expected to receive async error vIRQ */
123 return IRQ_HANDLED;
124 }
125
126 fn_self_async = ctrl->handlers.fn_self_async;
127 fn_peer = ctrl->handlers.fn_peer;
128
129 if ((fn_self_async == NULL) && (fn_peer == NULL)) {
130 dev_err(ctrl->dev, "Asynchronous error handlers absent\n");
131 return IRQ_HANDLED;
132 }
133
134 local_rd_idx = ctrl->err_info->async_metaData.rdIdx;
135 dev_dbg(ctrl->dev, "Local Rd Idx = %llu, shared Wr Idx = %llu\n",
136 local_rd_idx, ctrl->err_info->async_metaData.wrIdx);
137
138 /* Check async error. Read until error queue gets empty */
139 while (local_rd_idx != ctrl->err_info->async_metaData.wrIdx) {
140 next_rd_idx = (local_rd_idx + 1) % ctrl->async_err_arr_items;
141
142 err_data = &(ctrl->err_info->errData[next_rd_idx]);
143 if (err_data->offendingGuestId == config.guest_id_self)
144 handler = fn_self_async;
145 else
146 handler = fn_peer;
147
148 if (handler) {
149 if (handler(err_data) == true)
150 enter_bad_mode = true;
151 }
152
153 local_rd_idx = next_rd_idx;
154 num_async_errs_read++;
155 dev_dbg(ctrl->dev, "Local Rd Idx = %llu\n", local_rd_idx);
156 }
157
158 if (num_async_errs_read) {
159 dev_err(ctrl->dev, "%u asynchronous error(s) read\n",
160 num_async_errs_read);
161
162 /* Send ack for async error(s) to HV */
163 if (hyp_send_async_err_ack(local_rd_idx) != 0) {
164 dev_crit(ctrl->dev,
165 "%s: Sending ack failed. Setting bad mode\n",
166 __func__);
167 /* Unexpected */
168 enter_bad_mode = true;
169 }
170 }
171
172 if (enter_bad_mode) {
173 regs = get_irq_regs();
174 die("Oops - bad mode", regs, 0);
175 panic("bad mode");
176 }
177
178 return IRQ_HANDLED;
179}
180
181static int sync_err_handler(struct pt_regs *regs, int reason,
182 uint32_t esr, void *context)
183{
184 bool enter_bad_mode = false;
185 bool send_sync_err_ack = false;
186 const struct tegra_hv_err_ctrl *const ctrl = context;
187 const unsigned int vcpu_id = hyp_read_vcpu_id();
188
189 /* Check sync error */
190 if (check_sync_err(vcpu_id, ctrl, &send_sync_err_ack) == true)
191 enter_bad_mode = true;
192
193 /* Send ack for error to HV. */
194 if (send_sync_err_ack) {
195 if (hyp_send_sync_err_ack(send_sync_err_ack) != 0) {
196 dev_crit(ctrl->dev,
197 "%s: Sending ack failed. Setting bad mode\n",
198 __func__);
199 /* Unexpected */
200 enter_bad_mode = true;
201 }
202 }
203
204 /* Caller expects 0 to enter bad mode */
205 return (!enter_bad_mode);
206}
207
208void tegra_hv_get_config(struct tegra_hv_config *cfg)
209{
210 cfg->guest_id_self = config.guest_id_self;
211 cfg->num_guests = config.num_guests;
212}
213EXPORT_SYMBOL(tegra_hv_get_config);
214
215static int virq_handler_init(const struct platform_device *pdev)
216{
217 int ret;
218 struct irq_data *peer_err_irq_data;
219 int lin_peer_err_irq_id;
220 struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
221 struct device dev = pdev->dev;
222
223 dev_info(ctrl->dev, "Error notification HV IRQ id: %d\n",
224 ctrl->hv_peer_err_irq_id);
225
226 /* Ensure HV returned valid irq */
227 if (ctrl->hv_peer_err_irq_id == -1)
228 return 0;
229
230 /* Set indicate irq type 0 to indicate Shared Peripheral Irq */
231 intr_info[0] = cpu_to_be32(0);
232 /* Id in SPI namespace - subtract number of PPIs
233 * (Private Peripheral Irqs) which is = 32
234 */
235 intr_info[1] = cpu_to_be32(ctrl->hv_peer_err_irq_id - 32);
236 /* Trigger irq on low-to-high edge (0x1) */
237 intr_info[2] = cpu_to_be32(IRQF_TRIGGER_RISING);
238
239 interrupts_prop.length = sizeof(intr_info);
240 dev_info(ctrl->dev, "interrupts_prop.length %u\n",
241 interrupts_prop.length);
242
243 interrupts_prop.value = intr_info;
244
245 if (of_add_property(dev.of_node, &interrupts_prop)) {
246 dev_err(ctrl->dev, "%s: failed to add interrupts property\n",
247 __func__);
248 return -EACCES;
249 }
250
251 lin_peer_err_irq_id = of_irq_get(dev.of_node, 0);
252 if (lin_peer_err_irq_id < 0) {
253 dev_err(ctrl->dev, "%s: Unable to get Linux irq for id %d\n",
254 __func__, ctrl->hv_peer_err_irq_id);
255 return lin_peer_err_irq_id;
256 }
257
258 peer_err_irq_data = irq_get_irq_data(lin_peer_err_irq_id);
259 if (peer_err_irq_data == NULL) {
260 dev_err(ctrl->dev, "%s: Failed to get data for Linux irq %d\n",
261 __func__, lin_peer_err_irq_id);
262 return -ENODEV;
263 }
264
265 ret = devm_request_irq(&dev, lin_peer_err_irq_id, async_err_handler,
266 IRQ_NOTHREAD, dev_name(&dev), ctrl);
267 if (ret < 0) {
268 dev_err(ctrl->dev,
269 "%s: failed to register IRQ %d, Err %d, %s\n",
270 __func__, lin_peer_err_irq_id, ret, pdev->name);
271 return ret;
272 }
273 dev_info(ctrl->dev, "Registered Linux IRQ %d for peer notification\n",
274 lin_peer_err_irq_id);
275
276 return 0;
277}
278
279static int serr_handler_init(struct platform_device *pdev)
280{
281 struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
282
283 ctrl->hook.fn = sync_err_handler;
284 ctrl->hook.priv = platform_get_drvdata(pdev);
285 register_serr_hook(&ctrl->hook);
286
287 return 0;
288}
289
290static int shared_mem_map(struct platform_device *pdev)
291{
292 uint64_t ipa, buff_size, required_size;
293 int ret;
294 struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
295
296 /* Get error info details */
297 ret = hyp_read_err_info_get(&ipa, &buff_size,
298 &ctrl->async_err_arr_items, &ctrl->hv_peer_err_irq_id,
299 &ctrl->vcpu_cnt);
300 if (ret != 0) {
301 /* It could come here if DTS and defconfig enable execution
302 * of this code, but HV hasn't implemented the hypercall.
303 * Flag error.
304 */
305 dev_err(ctrl->dev,
306 "%s: failed to get err memory address. Err %d\n",
307 __func__, ret);
308 return -ENODEV;
309 }
310
311 if ((ipa == 0) || (buff_size == 0) ||
312 (ctrl->async_err_arr_items == 0)) {
313 /* It could come here if DTS and defconfig enable execution
314 * of this code, but PCT hasn't enabled error injection.
315 * A warning should suffice.
316 */
317 dev_warn(ctrl->dev, "%s: invalid shared memory parameters\n",
318 __func__);
319 return -ENOMEM;
320 }
321
322 /* Shared memory layout is:
323 * |--async-err-metadata--|--async-errors-array-|--sync-errors-array-|
324 * Size of async errors array = Max errors + 1 (to avoid same empty and
325 * full conditions of the buffer)
326 * Size of sync errors array = 1 error per VCPU * number of VCPUs on
327 * a VM
328 */
329 required_size = sizeof(struct async_metaData) +
330 (sizeof(struct errData) *
331 (ctrl->async_err_arr_items + ctrl->vcpu_cnt));
332 if (buff_size < required_size) {
333 dev_err(ctrl->dev,
334 "%s:invalid params. size %llu. required size %llu\n",
335 __func__, buff_size, required_size);
336 dev_err(ctrl->dev, "%s: async arr size %u. vcpus %u\n",
337 __func__, ctrl->async_err_arr_items, ctrl->vcpu_cnt);
338 return -ENOMEM;
339 }
340
341 dev_info(ctrl->dev, "%s: Err info IPA for guest %u: 0x%llx\n",
342 __func__, config.guest_id_self, ipa);
343 dev_info(ctrl->dev, "Err info buf size 0x%llX\n", buff_size);
344 dev_info(ctrl->dev, "Async err arr size %u. Number of VCPUs %u\n",
345 ctrl->async_err_arr_items, ctrl->vcpu_cnt);
346
347 /* Map shared memory */
348 ctrl->err_info = (struct errInfo *) ioremap_cache(ipa, buff_size);
349 if (ctrl->err_info == NULL)
350 return -ENOMEM;
351
352 return 0;
353}
354
355static int hyp_config_init(struct device *dev)
356{
357 int ret = hyp_read_gid(&config.guest_id_self);
358
359 if (ret != 0) {
360 dev_err(dev, "%s: failed to read guest id. Err %d\n",
361 __func__, ret);
362 return ret;
363 }
364
365 ret = hyp_read_nguests(&config.num_guests);
366 if (ret != 0) {
367 /* Only privileged guest can query number of guests */
368 dev_warn(dev, "%s: can't read number of guests. Err %d\n",
369 __func__, ret);
370 }
371
372 dev_info(dev, "%s: guest id %u num guests %u\n", __func__,
373 config.guest_id_self, config.num_guests);
374
375 return 0;
376}
377
378static void shared_structs_check(struct device *dev)
379{
380 /* Ensure coherency with common header */
381 BUILD_BUG_ON(REASON_ENUM_SIZE != (ARRAY_SIZE(fault_reason_desc)));
382
383 /* Manually compare these sizes with HV console dump to ensure
384 * common structures shared by HV and Linux are in sync
385 */
386 dev_info(dev, "async_metaData size 0x%lx\n",
387 sizeof(struct async_metaData));
388 dev_info(dev, "async_bridgeErr size 0x%lx\n",
389 sizeof(struct async_bridgeErr));
390 dev_info(dev, "async_smmuErr size 0x%lx\n",
391 sizeof(struct async_smmuErr));
392 dev_info(dev, "async_mcErr size 0x%lx\n",
393 sizeof(struct async_mcErr));
394 dev_info(dev, "sync_dataAbort size 0x%lx\n",
395 sizeof(struct sync_dataAbort));
396 dev_info(dev, "errData size 0x%lx\n", sizeof(struct errData));
397}
398
399static int vm_err_handler_init(struct platform_device *pdev)
400{
401 int ret;
402 struct tegra_hv_err_ctrl *ctrl;
403 struct device *dev = &pdev->dev;
404
405 if (!is_tegra_hypervisor_mode()) {
406 dev_err(dev, "%s: hypervisor is not present\n", __func__);
407 return -ENODEV;
408 }
409
410 shared_structs_check(dev);
411
412 ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL);
413 if (!ctrl)
414 return -ENOMEM;
415
416 ctrl->dev = dev;
417 platform_set_drvdata(pdev, ctrl);
418
419 ret = hyp_config_init(dev);
420 if (ret)
421 return ret;
422
423 ret = shared_mem_map(pdev);
424 if (ret)
425 return -ENOMEM;
426
427 ret = serr_handler_init(pdev);
428 if (ret)
429 return ret;
430
431 ret = virq_handler_init(pdev);
432 if (ret)
433 return ret;
434
435 return 0;
436}
437
438static int vm_err_handler_remove(struct platform_device *pdev)
439{
440 struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
441 struct device_node *node = pdev->dev.of_node;
442
443 if (of_remove_property(node,
444 of_find_property(node, "interrupts", NULL))) {
445 dev_err(ctrl->dev, "%s: failed to add interrupts property\n",
446 __func__);
447 return -EACCES;
448 }
449
450 unregister_serr_hook(&ctrl->hook);
451 iounmap(ctrl->err_info);
452
453 dev_info(ctrl->dev, "%s: cleaned up and unregistered handler\n",
454 __func__);
455
456 return 0;
457}
458
459static const struct of_device_id tegra_hv_err_match[] = {
460 { .compatible = "nvidia,tegra-hv-err", .data = NULL},
461 {},
462};
463
464static struct platform_driver tegra_hv_err_pdriver = {
465 .driver = {
466 .name = "tegra-hv-err-handler",
467 .owner = THIS_MODULE,
468 .of_match_table = of_match_ptr(tegra_hv_err_match),
469 },
470 .probe = vm_err_handler_init,
471 .remove = vm_err_handler_remove,
472};
473
474static int tegra_hv_register_hooks_for_device(struct device *dev,
475 void *handlers)
476{
477 struct tegra_hv_err_ctrl *ctrl;
478 const struct platform_device *pd = container_of(dev,
479 struct platform_device, dev);
480 const struct vm_err_handlers *_handlers =
481 (struct vm_err_handlers *) handlers;
482
483 ctrl = platform_get_drvdata(pd);
484 if (!ctrl) {
485 dev_err(dev, "%s: no platform data", __func__);
486 return 0;
487 }
488
489 if (ctrl->handlers.fn_self_async == NULL)
490 ctrl->handlers.fn_self_async = _handlers->fn_self_async;
491
492 if (ctrl->handlers.fn_self_sync == NULL)
493 ctrl->handlers.fn_self_sync = _handlers->fn_self_sync;
494
495 if (ctrl->handlers.fn_peer == NULL)
496 ctrl->handlers.fn_peer = _handlers->fn_peer;
497
498 return 0;
499}
500
501int tegra_hv_register_vm_err_hooks(struct vm_err_handlers *handlers)
502{
503 int ret;
504
505 if (!handlers) {
506 pr_err("%s: invalid error handlers\n", __func__);
507 return 1;
508 }
509
510 if (!handlers->fn_self_async && !handlers->fn_self_sync
511 && !handlers->fn_peer) {
512 platform_driver_unregister(&tegra_hv_err_pdriver);
513 return 0;
514 }
515
516 if (!tegra_hv_err_pdriver.driver.p) {
517 /* Not registered/bound yet */
518 ret = platform_driver_register(&tegra_hv_err_pdriver);
519 if (ret) {
520 pr_err("%s: failed to register driver. Err %d\n",
521 __func__, ret);
522 return ret;
523 }
524 }
525
526 ret = driver_for_each_device(&tegra_hv_err_pdriver.driver, NULL,
527 handlers, tegra_hv_register_hooks_for_device);
528 if (ret) {
529 pr_err("%s: failed to attach driver. Err %d\n", __func__, ret);
530 return ret;
531 }
532
533 return 0;
534}
535EXPORT_SYMBOL(tegra_hv_register_vm_err_hooks);