diff options
author | Sagi Grimberg <sagi@grimberg.me> | 2016-05-19 08:24:55 -0400 |
---|---|---|
committer | Sagi Grimberg <sagi@grimberg.me> | 2016-08-04 10:43:06 -0400 |
commit | d8f7750a08968b105056328652d2c332bdfa062d (patch) | |
tree | 8932ca6a801f85efffeb7825163e7627c139a978 | |
parent | 45862ebcc4883b1b6bc0701cd15cb2b68b140c5d (diff) |
nvmet-rdma: Correctly handle RDMA device hot removal
When configuring a device attached listener, we may
see device removal events. In this case we return a
non-zero return code from the cm event handler which
implicitly destroys the cm_id. It is possible that in
the future the user will remove this listener and by
that trigger a second call to rdma_destroy_id on an
already destroyed cm_id -> BUG.
In addition, when a queue bound (active session) cm_id
generates a DEVICE_REMOVAL event we must guarantee all
resources are cleaned up by the time we return from the
event handler.
Introduce nvmet_rdma_device_removal which addresses
(or at least attempts to) both scenarios.
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r-- | drivers/nvme/target/rdma.c | 87 |
1 files changed, 70 insertions, 17 deletions
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index e06d504bdf0c..48c811850c29 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c | |||
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state { | |||
77 | NVMET_RDMA_Q_CONNECTING, | 77 | NVMET_RDMA_Q_CONNECTING, |
78 | NVMET_RDMA_Q_LIVE, | 78 | NVMET_RDMA_Q_LIVE, |
79 | NVMET_RDMA_Q_DISCONNECTING, | 79 | NVMET_RDMA_Q_DISCONNECTING, |
80 | NVMET_RDMA_IN_DEVICE_REMOVAL, | ||
80 | }; | 81 | }; |
81 | 82 | ||
82 | struct nvmet_rdma_queue { | 83 | struct nvmet_rdma_queue { |
@@ -984,7 +985,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w) | |||
984 | struct nvmet_rdma_device *dev = queue->dev; | 985 | struct nvmet_rdma_device *dev = queue->dev; |
985 | 986 | ||
986 | nvmet_rdma_free_queue(queue); | 987 | nvmet_rdma_free_queue(queue); |
987 | rdma_destroy_id(cm_id); | 988 | |
989 | if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL) | ||
990 | rdma_destroy_id(cm_id); | ||
991 | |||
988 | kref_put(&dev->ref, nvmet_rdma_free_dev); | 992 | kref_put(&dev->ref, nvmet_rdma_free_dev); |
989 | } | 993 | } |
990 | 994 | ||
@@ -1233,8 +1237,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) | |||
1233 | switch (queue->state) { | 1237 | switch (queue->state) { |
1234 | case NVMET_RDMA_Q_CONNECTING: | 1238 | case NVMET_RDMA_Q_CONNECTING: |
1235 | case NVMET_RDMA_Q_LIVE: | 1239 | case NVMET_RDMA_Q_LIVE: |
1236 | disconnect = true; | ||
1237 | queue->state = NVMET_RDMA_Q_DISCONNECTING; | 1240 | queue->state = NVMET_RDMA_Q_DISCONNECTING; |
1241 | case NVMET_RDMA_IN_DEVICE_REMOVAL: | ||
1242 | disconnect = true; | ||
1238 | break; | 1243 | break; |
1239 | case NVMET_RDMA_Q_DISCONNECTING: | 1244 | case NVMET_RDMA_Q_DISCONNECTING: |
1240 | break; | 1245 | break; |
@@ -1272,6 +1277,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, | |||
1272 | schedule_work(&queue->release_work); | 1277 | schedule_work(&queue->release_work); |
1273 | } | 1278 | } |
1274 | 1279 | ||
1280 | /** | ||
1281 | * nvme_rdma_device_removal() - Handle RDMA device removal | ||
1282 | * @queue: nvmet rdma queue (cm id qp_context) | ||
1283 | * @addr: nvmet address (cm_id context) | ||
1284 | * | ||
1285 | * DEVICE_REMOVAL event notifies us that the RDMA device is about | ||
1286 | * to unplug so we should take care of destroying our RDMA resources. | ||
1287 | * This event will be generated for each allocated cm_id. | ||
1288 | * | ||
1289 | * Note that this event can be generated on a normal queue cm_id | ||
1290 | * and/or a device bound listener cm_id (where in this case | ||
1291 | * queue will be null). | ||
1292 | * | ||
1293 | * we claim ownership on destroying the cm_id. For queues we move | ||
1294 | * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port | ||
1295 | * we nullify the priv to prevent double cm_id destruction and destroying | ||
1296 | * the cm_id implicitely by returning a non-zero rc to the callout. | ||
1297 | */ | ||
1298 | static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, | ||
1299 | struct nvmet_rdma_queue *queue) | ||
1300 | { | ||
1301 | unsigned long flags; | ||
1302 | |||
1303 | if (!queue) { | ||
1304 | struct nvmet_port *port = cm_id->context; | ||
1305 | |||
1306 | /* | ||
1307 | * This is a listener cm_id. Make sure that | ||
1308 | * future remove_port won't invoke a double | ||
1309 | * cm_id destroy. use atomic xchg to make sure | ||
1310 | * we don't compete with remove_port. | ||
1311 | */ | ||
1312 | if (xchg(&port->priv, NULL) != cm_id) | ||
1313 | return 0; | ||
1314 | } else { | ||
1315 | /* | ||
1316 | * This is a queue cm_id. Make sure that | ||
1317 | * release queue will not destroy the cm_id | ||
1318 | * and schedule all ctrl queues removal (only | ||
1319 | * if the queue is not disconnecting already). | ||
1320 | */ | ||
1321 | spin_lock_irqsave(&queue->state_lock, flags); | ||
1322 | if (queue->state != NVMET_RDMA_Q_DISCONNECTING) | ||
1323 | queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL; | ||
1324 | spin_unlock_irqrestore(&queue->state_lock, flags); | ||
1325 | nvmet_rdma_queue_disconnect(queue); | ||
1326 | flush_scheduled_work(); | ||
1327 | } | ||
1328 | |||
1329 | /* | ||
1330 | * We need to return 1 so that the core will destroy | ||
1331 | * it's own ID. What a great API design.. | ||
1332 | */ | ||
1333 | return 1; | ||
1334 | } | ||
1335 | |||
1275 | static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, | 1336 | static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, |
1276 | struct rdma_cm_event *event) | 1337 | struct rdma_cm_event *event) |
1277 | { | 1338 | { |
@@ -1294,20 +1355,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, | |||
1294 | break; | 1355 | break; |
1295 | case RDMA_CM_EVENT_ADDR_CHANGE: | 1356 | case RDMA_CM_EVENT_ADDR_CHANGE: |
1296 | case RDMA_CM_EVENT_DISCONNECTED: | 1357 | case RDMA_CM_EVENT_DISCONNECTED: |
1297 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
1298 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: | 1358 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: |
1299 | /* | 1359 | nvmet_rdma_queue_disconnect(queue); |
1300 | * We can get the device removal callback even for a | 1360 | break; |
1301 | * CM ID that we aren't actually using. In that case | 1361 | case RDMA_CM_EVENT_DEVICE_REMOVAL: |
1302 | * the context pointer is NULL, so we shouldn't try | 1362 | ret = nvmet_rdma_device_removal(cm_id, queue); |
1303 | * to disconnect a non-existing queue. But we also | ||
1304 | * need to return 1 so that the core will destroy | ||
1305 | * it's own ID. What a great API design.. | ||
1306 | */ | ||
1307 | if (queue) | ||
1308 | nvmet_rdma_queue_disconnect(queue); | ||
1309 | else | ||
1310 | ret = 1; | ||
1311 | break; | 1363 | break; |
1312 | case RDMA_CM_EVENT_REJECTED: | 1364 | case RDMA_CM_EVENT_REJECTED: |
1313 | case RDMA_CM_EVENT_UNREACHABLE: | 1365 | case RDMA_CM_EVENT_UNREACHABLE: |
@@ -1396,9 +1448,10 @@ out_destroy_id: | |||
1396 | 1448 | ||
1397 | static void nvmet_rdma_remove_port(struct nvmet_port *port) | 1449 | static void nvmet_rdma_remove_port(struct nvmet_port *port) |
1398 | { | 1450 | { |
1399 | struct rdma_cm_id *cm_id = port->priv; | 1451 | struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); |
1400 | 1452 | ||
1401 | rdma_destroy_id(cm_id); | 1453 | if (cm_id) |
1454 | rdma_destroy_id(cm_id); | ||
1402 | } | 1455 | } |
1403 | 1456 | ||
1404 | static struct nvmet_fabrics_ops nvmet_rdma_ops = { | 1457 | static struct nvmet_fabrics_ops nvmet_rdma_ops = { |