From d23a4b3fd6ef70b80411b39b8c8bc548a219ce70 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sun, 29 Jan 2012 13:57:43 -0600 Subject: rbd: fix safety of rbd_put_client() The rbd_client structure uses a kref to arrange for cleaning up and freeing an instance when its last reference is dropped. The cleanup routine is rbd_client_release(), and one of the things it does is delete the rbd_client from rbd_client_list. It acquires node_lock to do so, but the way it is done is still not safe. The problem is that when attempting to reuse an existing rbd_client, the structure found might already be in the process of getting destroyed and cleaned up. Here's the scenario, with "CLIENT" representing an existing rbd_client that's involved in the race: Thread on CPU A | Thread on CPU B --------------- | --------------- rbd_put_client(CLIENT) | rbd_get_client() kref_put() | (acquires node_lock) kref->refcount becomes 0 | __rbd_client_find() returns CLIENT calls rbd_client_release() | kref_get(&CLIENT->kref); | (releases node_lock) (acquires node_lock) | deletes CLIENT from list | ...and starts using CLIENT... (releases node_lock) | and frees CLIENT | <-- but CLIENT gets freed here Fix this by having rbd_put_client() acquire node_lock. The result could still be improved, but at least it avoids this problem. Signed-off-by: Alex Elder Signed-off-by: Sage Weil --- drivers/block/rbd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7d8f8ddb3359..7f40cb4553c9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -407,15 +407,15 @@ done_err: /* * Destroy ceph client + * + * Caller must hold node_lock. */ static void rbd_client_release(struct kref *kref) { struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); dout("rbd_release_client %p\n", rbdc); - spin_lock(&node_lock); list_del(&rbdc->node); - spin_unlock(&node_lock); ceph_destroy_client(rbdc->client); kfree(rbdc->rbd_opts); @@ -428,7 +428,9 @@ static void rbd_client_release(struct kref *kref) */ static void rbd_put_client(struct rbd_device *rbd_dev) { + spin_lock(&node_lock); kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); + spin_unlock(&node_lock); rbd_dev->rbd_client = NULL; rbd_dev->client = NULL; } -- cgit v1.2.2