aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2009-09-05 23:36:16 -0400
committerRoland Dreier <rolandd@cisco.com>2009-09-05 23:36:16 -0400
commitd84106477733cb155c5dcaea664ddf120bf69eb7 (patch)
tree38c9750fe03fa0df76cf4153adfc56aa6d9d6dd2
parentd94a86890137fabcc97eaa324bfef3f1827744c8 (diff)
IB/mthca: Don't allow userspace open while recovering from catastrophic error
Userspace apps are supposed to release all ib device resources if they receive a fatal async event (IBV_EVENT_DEVICE_FATAL). However, the app has no way of knowing when the device has come back up, except to repeatedly attempt ibv_open_device() until it succeeds. However, currently there is no protection against the open succeeding while the device is in being removed following the fatal event. In this case, the open will succeed, but as a result the device waits in the middle of its removal until the new app releases its resources -- and the new app will not do so, since the open succeeded at a point following the fatal event generation. This patch adds an "active" flag to the device. The active flag is set to false (in the fatal event flow) before the "fatal" event is generated, so any subsequent ibv_dev_open() call to the device will fail until the device comes back up, thus preventing the above deadlock. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c3
4 files changed, 7 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 65ad359fdf16..056b2a4c6970 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -88,6 +88,7 @@ static void handle_catas(struct mthca_dev *dev)
88 event.device = &dev->ib_dev; 88 event.device = &dev->ib_dev;
89 event.event = IB_EVENT_DEVICE_FATAL; 89 event.event = IB_EVENT_DEVICE_FATAL;
90 event.element.port_num = 0; 90 event.element.port_num = 0;
91 dev->active = false;
91 92
92 ib_dispatch_event(&event); 93 ib_dispatch_event(&event);
93 94
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 9ef611f6dd36..7e6a6d64ad4e 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -357,6 +357,7 @@ struct mthca_dev {
357 struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; 357 struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
358 spinlock_t sm_lock; 358 spinlock_t sm_lock;
359 u8 rate[MTHCA_MAX_PORTS]; 359 u8 rate[MTHCA_MAX_PORTS];
360 bool active;
360}; 361};
361 362
362#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG 363#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 13da9f1d24c0..518cc540e516 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1116,6 +1116,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1116 pci_set_drvdata(pdev, mdev); 1116 pci_set_drvdata(pdev, mdev);
1117 mdev->hca_type = hca_type; 1117 mdev->hca_type = hca_type;
1118 1118
1119 mdev->active = true;
1120
1119 return 0; 1121 return 0;
1120 1122
1121err_unregister: 1123err_unregister:
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 87ad889e367b..bcf7a4014820 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -334,6 +334,9 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
334 struct mthca_ucontext *context; 334 struct mthca_ucontext *context;
335 int err; 335 int err;
336 336
337 if (!(to_mdev(ibdev)->active))
338 return ERR_PTR(-EAGAIN);
339
337 memset(&uresp, 0, sizeof uresp); 340 memset(&uresp, 0, sizeof uresp);
338 341
339 uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; 342 uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;