aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Donnellan <andrew.donnellan@au1.ibm.com>2017-02-05 20:07:17 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-03-14 22:02:42 -0400
commit53d43706f2ba5cd805093d21d69fd700584a71ab (patch)
tree97688aad50611bf48e30e5be11602ab6a7f428a4
parent411d0b0ced692dd2c0d7c10514ca8b923d8fa0f8 (diff)
cxl: fix nested locking hang during EEH hotplug
commit 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 upstream. Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured") introduced a rwsem to fix an invalid memory access that occurred when someone attempts to access the config space of an AFU on a vPHB whilst the AFU is deconfigured, such as during EEH recovery. It turns out that it's possible to run into a nested locking issue when EEH recovery fails and a full device hotplug is required. cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on configured_rwsem. When EEH recovery fails, the EEH code calls pci_hp_remove_devices() to remove the device, which in turn calls cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries to grab the writer lock that's already held. Standard rwsem semantics don't express what we really want to do here and don't allow for nested locking. Fix this by replacing the rwsem with an atomic_t which we can control more finely. Allow the AFU to be locked multiple times so long as there are no readers. Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured") Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/misc/cxl/cxl.h5
-rw-r--r--drivers/misc/cxl/main.c3
-rw-r--r--drivers/misc/cxl/pci.c11
-rw-r--r--drivers/misc/cxl/vphb.c18
4 files changed, 27 insertions, 10 deletions
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 379c463e0c4f..52ee3da85366 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -418,8 +418,9 @@ struct cxl_afu {
418 struct dentry *debugfs; 418 struct dentry *debugfs;
419 struct mutex contexts_lock; 419 struct mutex contexts_lock;
420 spinlock_t afu_cntl_lock; 420 spinlock_t afu_cntl_lock;
421 /* Used to block access to AFU config space while deconfigured */ 421
422 struct rw_semaphore configured_rwsem; 422 /* -1: AFU deconfigured/locked, >= 0: number of readers */
423 atomic_t configured_state;
423 424
424 /* AFU error buffer fields and bin attribute for sysfs */ 425 /* AFU error buffer fields and bin attribute for sysfs */
425 u64 eb_len, eb_offset; 426 u64 eb_len, eb_offset;
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 2a6bf1d0a3a4..cc1706a92ace 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
268 idr_init(&afu->contexts_idr); 268 idr_init(&afu->contexts_idr);
269 mutex_init(&afu->contexts_lock); 269 mutex_init(&afu->contexts_lock);
270 spin_lock_init(&afu->afu_cntl_lock); 270 spin_lock_init(&afu->afu_cntl_lock);
271 init_rwsem(&afu->configured_rwsem); 271 atomic_set(&afu->configured_state, -1);
272 down_write(&afu->configured_rwsem);
273 afu->prefault_mode = CXL_PREFAULT_NONE; 272 afu->prefault_mode = CXL_PREFAULT_NONE;
274 afu->irqs_max = afu->adapter->user_irqs; 273 afu->irqs_max = afu->adapter->user_irqs;
275 274
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index b2ff10891775..dd99b06e121a 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
1129 if ((rc = cxl_native_register_psl_irq(afu))) 1129 if ((rc = cxl_native_register_psl_irq(afu)))
1130 goto err2; 1130 goto err2;
1131 1131
1132 up_write(&afu->configured_rwsem); 1132 atomic_set(&afu->configured_state, 0);
1133 return 0; 1133 return 0;
1134 1134
1135err2: 1135err2:
@@ -1142,7 +1142,14 @@ err1:
1142 1142
1143static void pci_deconfigure_afu(struct cxl_afu *afu) 1143static void pci_deconfigure_afu(struct cxl_afu *afu)
1144{ 1144{
1145 down_write(&afu->configured_rwsem); 1145 /*
1146 * It's okay to deconfigure when AFU is already locked, otherwise wait
1147 * until there are no readers
1148 */
1149 if (atomic_read(&afu->configured_state) != -1) {
1150 while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
1151 schedule();
1152 }
1146 cxl_native_release_psl_irq(afu); 1153 cxl_native_release_psl_irq(afu);
1147 if (afu->adapter->native->sl_ops->release_serr_irq) 1154 if (afu->adapter->native->sl_ops->release_serr_irq)
1148 afu->adapter->native->sl_ops->release_serr_irq(afu); 1155 afu->adapter->native->sl_ops->release_serr_irq(afu);
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 639a343b7836..512a4897dbf6 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
83 return phb ? phb->private_data : NULL; 83 return phb ? phb->private_data : NULL;
84} 84}
85 85
86static void cxl_afu_configured_put(struct cxl_afu *afu)
87{
88 atomic_dec_if_positive(&afu->configured_state);
89}
90
91static bool cxl_afu_configured_get(struct cxl_afu *afu)
92{
93 return atomic_inc_unless_negative(&afu->configured_state);
94}
95
86static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, 96static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
87 struct cxl_afu *afu, int *_record) 97 struct cxl_afu *afu, int *_record)
88{ 98{
@@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
107 117
108 afu = pci_bus_to_afu(bus); 118 afu = pci_bus_to_afu(bus);
109 /* Grab a reader lock on afu. */ 119 /* Grab a reader lock on afu. */
110 if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) 120 if (afu == NULL || !cxl_afu_configured_get(afu))
111 return PCIBIOS_DEVICE_NOT_FOUND; 121 return PCIBIOS_DEVICE_NOT_FOUND;
112 122
113 rc = cxl_pcie_config_info(bus, devfn, afu, &record); 123 rc = cxl_pcie_config_info(bus, devfn, afu, &record);
@@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
132 } 142 }
133 143
134out: 144out:
135 up_read(&afu->configured_rwsem); 145 cxl_afu_configured_put(afu);
136 return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; 146 return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
137} 147}
138 148
@@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
144 154
145 afu = pci_bus_to_afu(bus); 155 afu = pci_bus_to_afu(bus);
146 /* Grab a reader lock on afu. */ 156 /* Grab a reader lock on afu. */
147 if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) 157 if (afu == NULL || !cxl_afu_configured_get(afu))
148 return PCIBIOS_DEVICE_NOT_FOUND; 158 return PCIBIOS_DEVICE_NOT_FOUND;
149 159
150 rc = cxl_pcie_config_info(bus, devfn, afu, &record); 160 rc = cxl_pcie_config_info(bus, devfn, afu, &record);
@@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
166 } 176 }
167 177
168out: 178out:
169 up_read(&afu->configured_rwsem); 179 cxl_afu_configured_put(afu);
170 return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL; 180 return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
171} 181}
172 182