nvmet-rdma: Correctly handle RDMA device hot removal

When configuring a device attached listener, we may see device removal events. In this case we return a non-zero return code from the cm event handler which implicitly destroys the cm_id. It is possible that in the future the user will remove this listener and by that trigger a second call to rdma_destroy_id on an already destroyed cm_id -> BUG. In addition, when a queue bound (active session) cm_id generates a DEVICE_REMOVAL event we must guarantee all resources are cleaned up by the time we return from the event handler. Introduce nvmet_rdma_device_removal which addresses (or at least attempts to) both scenarios. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Christoph Hellwig <hch@lst.de>
author: Sagi Grimberg <sagi@grimberg.me> 2016-05-19 08:24:55 -0400
committer: Sagi Grimberg <sagi@grimberg.me> 2016-08-04 10:43:06 -0400
commit: d8f7750a08968b105056328652d2c332bdfa062d (patch)
tree: 8932ca6a801f85efffeb7825163e7627c139a978
parent: 45862ebcc4883b1b6bc0701cd15cb2b68b140c5d (diff)
1 files changed, 70 insertions, 17 deletions
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e06d504bdf0c..48c811850c29 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
        NVMET_RDMA_Q_CONNECTING,
        NVMET_RDMA_Q_LIVE,
        NVMET_RDMA_Q_DISCONNECTING,
+        NVMET_RDMA_IN_DEVICE_REMOVAL,
 };
 struct nvmet_rdma_queue {
@@ -984,7 +985,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
        struct nvmet_rdma_device *dev = queue->dev;
        nvmet_rdma_free_queue(queue);
-        rdma_destroy_id(cm_id);
+        if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+                rdma_destroy_id(cm_id);
        kref_put(&dev->ref, nvmet_rdma_free_dev);
 }
@@ -1233,8 +1237,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
        switch (queue->state) {
        case NVMET_RDMA_Q_CONNECTING:
        case NVMET_RDMA_Q_LIVE:
-                disconnect = true;
                queue->state = NVMET_RDMA_Q_DISCONNECTING;
+        case NVMET_RDMA_IN_DEVICE_REMOVAL:
+                disconnect = true;
                break;
        case NVMET_RDMA_Q_DISCONNECTING:
                break;
@@ -1272,6 +1277,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
        schedule_work(&queue->release_work);
 }
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue:      nvmet rdma queue (cm id qp_context)
+ * @addr:       nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+                struct nvmet_rdma_queue *queue)
+{
+        unsigned long flags;
+        if (!queue) {
+                struct nvmet_port *port = cm_id->context;
+                /*
+                 * This is a listener cm_id. Make sure that
+                 * future remove_port won't invoke a double
+                 * cm_id destroy. use atomic xchg to make sure
+                 * we don't compete with remove_port.
+                 */
+                if (xchg(&port->priv, NULL) != cm_id)
+                        return 0;
+        } else {
+                /*
+                 * This is a queue cm_id. Make sure that
+                 * release queue will not destroy the cm_id
+                 * and schedule all ctrl queues removal (only
+                 * if the queue is not disconnecting already).
+                 */
+                spin_lock_irqsave(&queue->state_lock, flags);
+                if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+                        queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+                spin_unlock_irqrestore(&queue->state_lock, flags);
+                nvmet_rdma_queue_disconnect(queue);
+                flush_scheduled_work();
+        }
+        /*
+         * We need to return 1 so that the core will destroy
+         * it's own ID.  What a great API design..
+         */
+        return 1;
+}
 static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
                struct rdma_cm_event *event)
 {
@@ -1294,20 +1355,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
                break;
        case RDMA_CM_EVENT_ADDR_CHANGE:
        case RDMA_CM_EVENT_DISCONNECTED:
-        case RDMA_CM_EVENT_DEVICE_REMOVAL:
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-                /*
+                nvmet_rdma_queue_disconnect(queue);
-                 * We can get the device removal callback even for a
+                break;
-                 * CM ID that we aren't actually using.  In that case
+        case RDMA_CM_EVENT_DEVICE_REMOVAL:
-                 * the context pointer is NULL, so we shouldn't try
+                ret = nvmet_rdma_device_removal(cm_id, queue);
-                 * to disconnect a non-existing queue.  But we also
-                 * need to return 1 so that the core will destroy
-                 * it's own ID.  What a great API design..
-                 */
-                if (queue)
-                        nvmet_rdma_queue_disconnect(queue);
-                else
-                        ret = 1;
                break;
        case RDMA_CM_EVENT_REJECTED:
        case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1448,10 @@ out_destroy_id:
 static void nvmet_rdma_remove_port(struct nvmet_port *port)
 {
-        struct rdma_cm_id *cm_id = port->priv;
+        struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
-        rdma_destroy_id(cm_id);
+        if (cm_id)
+                rdma_destroy_id(cm_id);
 }
 static struct nvmet_fabrics_ops nvmet_rdma_ops = {
author	Sagi Grimberg <sagi@grimberg.me>	2016-05-19 08:24:55 -0400
committer	Sagi Grimberg <sagi@grimberg.me>	2016-08-04 10:43:06 -0400
commit	d8f7750a08968b105056328652d2c332bdfa062d (patch)
tree	8932ca6a801f85efffeb7825163e7627c139a978
parent	45862ebcc4883b1b6bc0701cd15cb2b68b140c5d (diff)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index e06d504bdf0c..48c811850c29 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
77	NVMET_RDMA_Q_CONNECTING,	77	NVMET_RDMA_Q_CONNECTING,
78	NVMET_RDMA_Q_LIVE,	78	NVMET_RDMA_Q_LIVE,
79	NVMET_RDMA_Q_DISCONNECTING,	79	NVMET_RDMA_Q_DISCONNECTING,
		80	NVMET_RDMA_IN_DEVICE_REMOVAL,
80	};	81	};
81		82
82	struct nvmet_rdma_queue {	83	struct nvmet_rdma_queue {
@@ -984,7 +985,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
984	struct nvmet_rdma_device *dev = queue->dev;	985	struct nvmet_rdma_device *dev = queue->dev;
985		986
986	nvmet_rdma_free_queue(queue);	987	nvmet_rdma_free_queue(queue);
987	rdma_destroy_id(cm_id);	988
		989	if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
		990	rdma_destroy_id(cm_id);
		991
988	kref_put(&dev->ref, nvmet_rdma_free_dev);	992	kref_put(&dev->ref, nvmet_rdma_free_dev);
989	}	993	}
990		994
@@ -1233,8 +1237,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
1233	switch (queue->state) {	1237	switch (queue->state) {
1234	case NVMET_RDMA_Q_CONNECTING:	1238	case NVMET_RDMA_Q_CONNECTING:
1235	case NVMET_RDMA_Q_LIVE:	1239	case NVMET_RDMA_Q_LIVE:
1236	disconnect = true;
1237	queue->state = NVMET_RDMA_Q_DISCONNECTING;	1240	queue->state = NVMET_RDMA_Q_DISCONNECTING;
		1241	case NVMET_RDMA_IN_DEVICE_REMOVAL:
		1242	disconnect = true;
1238	break;	1243	break;
1239	case NVMET_RDMA_Q_DISCONNECTING:	1244	case NVMET_RDMA_Q_DISCONNECTING:
1240	break;	1245	break;
@@ -1272,6 +1277,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
1272	schedule_work(&queue->release_work);	1277	schedule_work(&queue->release_work);
1273	}	1278	}
1274		1279
		1280	/**
		1281	* nvme_rdma_device_removal() - Handle RDMA device removal
		1282	* @queue: nvmet rdma queue (cm id qp_context)
		1283	* @addr: nvmet address (cm_id context)
		1284	*
		1285	* DEVICE_REMOVAL event notifies us that the RDMA device is about
		1286	* to unplug so we should take care of destroying our RDMA resources.
		1287	* This event will be generated for each allocated cm_id.
		1288	*
		1289	* Note that this event can be generated on a normal queue cm_id
		1290	* and/or a device bound listener cm_id (where in this case
		1291	* queue will be null).
		1292	*
		1293	* we claim ownership on destroying the cm_id. For queues we move
		1294	* the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
		1295	* we nullify the priv to prevent double cm_id destruction and destroying
		1296	* the cm_id implicitely by returning a non-zero rc to the callout.
		1297	*/
		1298	static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
		1299	struct nvmet_rdma_queue *queue)
		1300	{
		1301	unsigned long flags;
		1302
		1303	if (!queue) {
		1304	struct nvmet_port *port = cm_id->context;
		1305
		1306	/*
		1307	* This is a listener cm_id. Make sure that
		1308	* future remove_port won't invoke a double
		1309	* cm_id destroy. use atomic xchg to make sure
		1310	* we don't compete with remove_port.
		1311	*/
		1312	if (xchg(&port->priv, NULL) != cm_id)
		1313	return 0;
		1314	} else {
		1315	/*
		1316	* This is a queue cm_id. Make sure that
		1317	* release queue will not destroy the cm_id
		1318	* and schedule all ctrl queues removal (only
		1319	* if the queue is not disconnecting already).
		1320	*/
		1321	spin_lock_irqsave(&queue->state_lock, flags);
		1322	if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
		1323	queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
		1324	spin_unlock_irqrestore(&queue->state_lock, flags);
		1325	nvmet_rdma_queue_disconnect(queue);
		1326	flush_scheduled_work();
		1327	}
		1328
		1329	/*
		1330	* We need to return 1 so that the core will destroy
		1331	* it's own ID. What a great API design..
		1332	*/
		1333	return 1;
		1334	}
		1335
1275	static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,	1336	static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
1276	struct rdma_cm_event *event)	1337	struct rdma_cm_event *event)
1277	{	1338	{
@@ -1294,20 +1355,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
1294	break;	1355	break;
1295	case RDMA_CM_EVENT_ADDR_CHANGE:	1356	case RDMA_CM_EVENT_ADDR_CHANGE:
1296	case RDMA_CM_EVENT_DISCONNECTED:	1357	case RDMA_CM_EVENT_DISCONNECTED:
1297	case RDMA_CM_EVENT_DEVICE_REMOVAL:
1298	case RDMA_CM_EVENT_TIMEWAIT_EXIT:	1358	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1299	/*	1359	nvmet_rdma_queue_disconnect(queue);
1300	* We can get the device removal callback even for a	1360	break;
1301	* CM ID that we aren't actually using. In that case	1361	case RDMA_CM_EVENT_DEVICE_REMOVAL:
1302	* the context pointer is NULL, so we shouldn't try	1362	ret = nvmet_rdma_device_removal(cm_id, queue);
1303	* to disconnect a non-existing queue. But we also
1304	* need to return 1 so that the core will destroy
1305	* it's own ID. What a great API design..
1306	*/
1307	if (queue)
1308	nvmet_rdma_queue_disconnect(queue);
1309	else
1310	ret = 1;
1311	break;	1363	break;
1312	case RDMA_CM_EVENT_REJECTED:	1364	case RDMA_CM_EVENT_REJECTED:
1313	case RDMA_CM_EVENT_UNREACHABLE:	1365	case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1448,10 @@ out_destroy_id:
1396		1448
1397	static void nvmet_rdma_remove_port(struct nvmet_port *port)	1449	static void nvmet_rdma_remove_port(struct nvmet_port *port)
1398	{	1450	{
1399	struct rdma_cm_id *cm_id = port->priv;	1451	struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
1400		1452
1401	rdma_destroy_id(cm_id);	1453	if (cm_id)
		1454	rdma_destroy_id(cm_id);
1402	}	1455	}
1403		1456
1404	static struct nvmet_fabrics_ops nvmet_rdma_ops = {	1457	static struct nvmet_fabrics_ops nvmet_rdma_ops = {