diff options
author | Christophe Lombard <clombard@linux.vnet.ibm.com> | 2016-03-04 06:26:41 -0500 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2016-03-09 07:40:00 -0500 |
commit | 0d400f77c19e8d2606f8194846bcf18ebdc9df2a (patch) | |
tree | 307436d00d93f55e2aa27d2233c91346c14e8961 /drivers/misc/cxl/native.c | |
parent | d601ea918b878582e60b773f2f943d8d292b2abf (diff) |
cxl: Adapter failure handling
Check the AFU state whenever an API is called. The hypervisor may
issue a reset of the adapter when it detects a fault. When it happens,
it launches an error recovery which will either move the AFU to a
permanent failure state, or in the disabled state.
If the AFU is found to be disabled, detach all existing contexts from
it before issuing a AFU reset to re-enable it.
Before detaching contexts, notify any kernel driver through the EEH
callbacks of the AFU pci device.
Co-authored-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Reviewed-by: Manoj Kumar <manoj@linux.vnet.ibm.com>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'drivers/misc/cxl/native.c')
-rw-r--r-- | drivers/misc/cxl/native.c | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index e564ae657584..387fcbdf9793 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c | |||
@@ -42,7 +42,7 @@ static int afu_control(struct cxl_afu *afu, u64 command, | |||
42 | goto out; | 42 | goto out; |
43 | } | 43 | } |
44 | 44 | ||
45 | if (!cxl_ops->link_ok(afu->adapter)) { | 45 | if (!cxl_ops->link_ok(afu->adapter, afu)) { |
46 | afu->enabled = enabled; | 46 | afu->enabled = enabled; |
47 | rc = -EIO; | 47 | rc = -EIO; |
48 | goto out; | 48 | goto out; |
@@ -92,7 +92,7 @@ static int native_afu_reset(struct cxl_afu *afu) | |||
92 | 92 | ||
93 | static int native_afu_check_and_enable(struct cxl_afu *afu) | 93 | static int native_afu_check_and_enable(struct cxl_afu *afu) |
94 | { | 94 | { |
95 | if (!cxl_ops->link_ok(afu->adapter)) { | 95 | if (!cxl_ops->link_ok(afu->adapter, afu)) { |
96 | WARN(1, "Refusing to enable afu while link down!\n"); | 96 | WARN(1, "Refusing to enable afu while link down!\n"); |
97 | return -EIO; | 97 | return -EIO; |
98 | } | 98 | } |
@@ -114,7 +114,7 @@ int cxl_psl_purge(struct cxl_afu *afu) | |||
114 | 114 | ||
115 | pr_devel("PSL purge request\n"); | 115 | pr_devel("PSL purge request\n"); |
116 | 116 | ||
117 | if (!cxl_ops->link_ok(afu->adapter)) { | 117 | if (!cxl_ops->link_ok(afu->adapter, afu)) { |
118 | dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); | 118 | dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); |
119 | rc = -EIO; | 119 | rc = -EIO; |
120 | goto out; | 120 | goto out; |
@@ -136,7 +136,7 @@ int cxl_psl_purge(struct cxl_afu *afu) | |||
136 | rc = -EBUSY; | 136 | rc = -EBUSY; |
137 | goto out; | 137 | goto out; |
138 | } | 138 | } |
139 | if (!cxl_ops->link_ok(afu->adapter)) { | 139 | if (!cxl_ops->link_ok(afu->adapter, afu)) { |
140 | rc = -EIO; | 140 | rc = -EIO; |
141 | goto out; | 141 | goto out; |
142 | } | 142 | } |
@@ -250,7 +250,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) | |||
250 | dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); | 250 | dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); |
251 | return -EBUSY; | 251 | return -EBUSY; |
252 | } | 252 | } |
253 | if (!cxl_ops->link_ok(adapter)) | 253 | if (!cxl_ops->link_ok(adapter, NULL)) |
254 | return -EIO; | 254 | return -EIO; |
255 | cpu_relax(); | 255 | cpu_relax(); |
256 | } | 256 | } |
@@ -261,7 +261,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) | |||
261 | dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); | 261 | dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); |
262 | return -EBUSY; | 262 | return -EBUSY; |
263 | } | 263 | } |
264 | if (!cxl_ops->link_ok(adapter)) | 264 | if (!cxl_ops->link_ok(adapter, NULL)) |
265 | return -EIO; | 265 | return -EIO; |
266 | cpu_relax(); | 266 | cpu_relax(); |
267 | } | 267 | } |
@@ -302,7 +302,7 @@ static void slb_invalid(struct cxl_context *ctx) | |||
302 | cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); | 302 | cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); |
303 | 303 | ||
304 | while (1) { | 304 | while (1) { |
305 | if (!cxl_ops->link_ok(adapter)) | 305 | if (!cxl_ops->link_ok(adapter, NULL)) |
306 | break; | 306 | break; |
307 | slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); | 307 | slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); |
308 | if (!(slbia & CXL_TLB_SLB_P)) | 308 | if (!(slbia & CXL_TLB_SLB_P)) |
@@ -333,7 +333,7 @@ static int do_process_element_cmd(struct cxl_context *ctx, | |||
333 | rc = -EBUSY; | 333 | rc = -EBUSY; |
334 | goto out; | 334 | goto out; |
335 | } | 335 | } |
336 | if (!cxl_ops->link_ok(ctx->afu->adapter)) { | 336 | if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { |
337 | dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); | 337 | dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); |
338 | rc = -EIO; | 338 | rc = -EIO; |
339 | goto out; | 339 | goto out; |
@@ -389,7 +389,7 @@ static int terminate_process_element(struct cxl_context *ctx) | |||
389 | * should always succeed: it's not running if the hw has gone | 389 | * should always succeed: it's not running if the hw has gone |
390 | * away and is being reset. | 390 | * away and is being reset. |
391 | */ | 391 | */ |
392 | if (cxl_ops->link_ok(ctx->afu->adapter)) | 392 | if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) |
393 | rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, | 393 | rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, |
394 | CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); | 394 | CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); |
395 | ctx->elem->software_state = 0; /* Remove Valid bit */ | 395 | ctx->elem->software_state = 0; /* Remove Valid bit */ |
@@ -408,7 +408,7 @@ static int remove_process_element(struct cxl_context *ctx) | |||
408 | /* We could be asked to remove when the hw is down. Again, if | 408 | /* We could be asked to remove when the hw is down. Again, if |
409 | * the hw is down, the PE is gone, so we succeed. | 409 | * the hw is down, the PE is gone, so we succeed. |
410 | */ | 410 | */ |
411 | if (cxl_ops->link_ok(ctx->afu->adapter)) | 411 | if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) |
412 | rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); | 412 | rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); |
413 | 413 | ||
414 | if (!rc) | 414 | if (!rc) |
@@ -650,7 +650,7 @@ static int native_afu_activate_mode(struct cxl_afu *afu, int mode) | |||
650 | if (!(mode & afu->modes_supported)) | 650 | if (!(mode & afu->modes_supported)) |
651 | return -EINVAL; | 651 | return -EINVAL; |
652 | 652 | ||
653 | if (!cxl_ops->link_ok(afu->adapter)) { | 653 | if (!cxl_ops->link_ok(afu->adapter, afu)) { |
654 | WARN(1, "Device link is down, refusing to activate!\n"); | 654 | WARN(1, "Device link is down, refusing to activate!\n"); |
655 | return -EIO; | 655 | return -EIO; |
656 | } | 656 | } |
@@ -666,7 +666,7 @@ static int native_afu_activate_mode(struct cxl_afu *afu, int mode) | |||
666 | static int native_attach_process(struct cxl_context *ctx, bool kernel, | 666 | static int native_attach_process(struct cxl_context *ctx, bool kernel, |
667 | u64 wed, u64 amr) | 667 | u64 wed, u64 amr) |
668 | { | 668 | { |
669 | if (!cxl_ops->link_ok(ctx->afu->adapter)) { | 669 | if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { |
670 | WARN(1, "Device link is down, refusing to attach process!\n"); | 670 | WARN(1, "Device link is down, refusing to attach process!\n"); |
671 | return -EIO; | 671 | return -EIO; |
672 | } | 672 | } |
@@ -718,7 +718,7 @@ static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info) | |||
718 | /* If the adapter has gone away, we can't get any meaningful | 718 | /* If the adapter has gone away, we can't get any meaningful |
719 | * information. | 719 | * information. |
720 | */ | 720 | */ |
721 | if (!cxl_ops->link_ok(afu->adapter)) | 721 | if (!cxl_ops->link_ok(afu->adapter, afu)) |
722 | return -EIO; | 722 | return -EIO; |
723 | 723 | ||
724 | info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); | 724 | info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); |
@@ -975,7 +975,7 @@ static bool native_support_attributes(const char *attr_name, | |||
975 | 975 | ||
976 | static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out) | 976 | static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out) |
977 | { | 977 | { |
978 | if (unlikely(!cxl_ops->link_ok(afu->adapter))) | 978 | if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) |
979 | return -EIO; | 979 | return -EIO; |
980 | if (unlikely(off >= afu->crs_len)) | 980 | if (unlikely(off >= afu->crs_len)) |
981 | return -ERANGE; | 981 | return -ERANGE; |
@@ -986,7 +986,7 @@ static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out) | |||
986 | 986 | ||
987 | static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out) | 987 | static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out) |
988 | { | 988 | { |
989 | if (unlikely(!cxl_ops->link_ok(afu->adapter))) | 989 | if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) |
990 | return -EIO; | 990 | return -EIO; |
991 | if (unlikely(off >= afu->crs_len)) | 991 | if (unlikely(off >= afu->crs_len)) |
992 | return -ERANGE; | 992 | return -ERANGE; |
@@ -1021,7 +1021,7 @@ static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out) | |||
1021 | 1021 | ||
1022 | static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) | 1022 | static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) |
1023 | { | 1023 | { |
1024 | if (unlikely(!cxl_ops->link_ok(afu->adapter))) | 1024 | if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) |
1025 | return -EIO; | 1025 | return -EIO; |
1026 | if (unlikely(off >= afu->crs_len)) | 1026 | if (unlikely(off >= afu->crs_len)) |
1027 | return -ERANGE; | 1027 | return -ERANGE; |