aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-05-24 04:15:32 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-05-26 04:10:43 -0400
commit0b2c2a71e6f07fb67e6f72817d39910f64d2e258 (patch)
treee80cee308ca16f262db0bec9aba200eb11dde9f4
parent1ddd45f8d76f0c15ec4e44073eeaaee6a806ee81 (diff)
PCI: Replace the racy recursion prevention
pci_call_probe() can called recursively when a physcial function is probed and the probing creates virtual functions, which are populated via pci_bus_add_device() which in turn can end up calling pci_call_probe() again. The code has an interesting way to prevent recursing into the workqueue code. That's accomplished by a check whether the current task runs already on the numa node which is associated with the device. While that works to prevent the recursion into the workqueue code, it's racy versus normal execution as there is no guarantee that the node does not vanish after the check. There is another issue with this code. It dereferences cpumask_of_node() unconditionally without checking whether the node is available. Make the detection reliable by: - Mark a probed device as 'is_probed' in pci_call_probe() - Check in pci_call_probe for a virtual function. If it's a virtual function and the associated physical function device is marked 'is_probed' then this is a recursive call, so the call can be invoked in the calling context. - Add a check whether the node is online before dereferencing it. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Ingo Molnar <mingo@kernel.org> Acked-by: Bjorn Helgaas <bhelgaas@google.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: linux-pci@vger.kernel.org Cc: Sebastian Siewior <bigeasy@linutronix.de> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/20170524081548.771457199@linutronix.de
-rw-r--r--drivers/pci/pci-driver.c47
-rw-r--r--include/linux/pci.h1
2 files changed, 26 insertions, 22 deletions
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 5bf92fd983e5..fe6be6382505 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -320,10 +320,19 @@ static long local_pci_probe(void *_ddi)
320 return 0; 320 return 0;
321} 321}
322 322
323static bool pci_physfn_is_probed(struct pci_dev *dev)
324{
325#ifdef CONFIG_PCI_IOV
326 return dev->is_virtfn && dev->physfn->is_probed;
327#else
328 return false;
329#endif
330}
331
323static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, 332static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
324 const struct pci_device_id *id) 333 const struct pci_device_id *id)
325{ 334{
326 int error, node; 335 int error, node, cpu;
327 struct drv_dev_and_id ddi = { drv, dev, id }; 336 struct drv_dev_and_id ddi = { drv, dev, id };
328 337
329 /* 338 /*
@@ -332,33 +341,27 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
332 * on the right node. 341 * on the right node.
333 */ 342 */
334 node = dev_to_node(&dev->dev); 343 node = dev_to_node(&dev->dev);
344 dev->is_probed = 1;
345
346 cpu_hotplug_disable();
335 347
336 /* 348 /*
337 * On NUMA systems, we are likely to call a PF probe function using 349 * Prevent nesting work_on_cpu() for the case where a Virtual Function
338 * work_on_cpu(). If that probe calls pci_enable_sriov() (which 350 * device is probed from work_on_cpu() of the Physical device.
339 * adds the VF devices via pci_bus_add_device()), we may re-enter
340 * this function to call the VF probe function. Calling
341 * work_on_cpu() again will cause a lockdep warning. Since VFs are
342 * always on the same node as the PF, we can work around this by
343 * avoiding work_on_cpu() when we're already on the correct node.
344 *
345 * Preemption is enabled, so it's theoretically unsafe to use
346 * numa_node_id(), but even if we run the probe function on the
347 * wrong node, it should be functionally correct.
348 */ 351 */
349 if (node >= 0 && node != numa_node_id()) { 352 if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
350 int cpu; 353 pci_physfn_is_probed(dev))
351 354 cpu = nr_cpu_ids;
352 cpu_hotplug_disable(); 355 else
353 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); 356 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
354 if (cpu < nr_cpu_ids) 357
355 error = work_on_cpu(cpu, local_pci_probe, &ddi); 358 if (cpu < nr_cpu_ids)
356 else 359 error = work_on_cpu(cpu, local_pci_probe, &ddi);
357 error = local_pci_probe(&ddi); 360 else
358 cpu_hotplug_enable();
359 } else
360 error = local_pci_probe(&ddi); 361 error = local_pci_probe(&ddi);
361 362
363 dev->is_probed = 0;
364 cpu_hotplug_enable();
362 return error; 365 return error;
363} 366}
364 367
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..5026f2ae86db 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -371,6 +371,7 @@ struct pci_dev {
371 unsigned int irq_managed:1; 371 unsigned int irq_managed:1;
372 unsigned int has_secondary_link:1; 372 unsigned int has_secondary_link:1;
373 unsigned int non_compliant_bars:1; /* broken BARs; ignore them */ 373 unsigned int non_compliant_bars:1; /* broken BARs; ignore them */
374 unsigned int is_probed:1; /* device probing in progress */
374 pci_dev_flags_t dev_flags; 375 pci_dev_flags_t dev_flags;
375 atomic_t enable_cnt; /* pci_enable_device has been called */ 376 atomic_t enable_cnt; /* pci_enable_device has been called */
376 377