configfs: Silence lockdep on mkdir() and rmdir()

When attaching default groups (subdirs) of a new group (in mkdir() or in configfs_register()), configfs recursively takes inode's mutexes along the path from the parent of the new group to the default subdirs. This is needed to ensure that the VFS will not race with operations on these sub-dirs. This is safe for the following reasons: - the VFS allows one to lock first an inode and second one of its children (The lock subclasses for this pattern are respectively I_MUTEX_PARENT and I_MUTEX_CHILD); - from this rule any inode path can be recursively locked in descending order as long as it stays under a single mountpoint and does not follow symlinks. Unfortunately lockdep does not know (yet?) how to handle such recursion. I've tried to use Peter Zijlstra's lock_set_subclass() helper to upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know that we might recursively lock some of their descendant, but this usage does not seem to fit the purpose of lock_set_subclass() because it leads to several i_mutex locked with subclass I_MUTEX_PARENT by the same task. >From inside configfs it is not possible to serialize those recursive locking with a top-level one, because mkdir() and rmdir() are already called with inodes locked by the VFS. So using some mutex_lock_nest_lock() is not an option. I am proposing two solutions: 1) one that wraps recursive mutex_lock()s with lockdep_off()/lockdep_on(). 2) (as suggested earlier by Peter Zijlstra) one that puts the i_mutexes recursively locked in different classes based on their depth from the top-level config_group created. This induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the nesting of configfs default groups whenever lockdep is activated but this limit looks reasonably high. Unfortunately, this also isolates VFS operations on configfs default groups from the others and thus lowers the chances to detect locking issues. Nobody likes solution 1), which I can understand. This patch implements solution 2). However lockdep is still not happy with configfs_depend_item(). Next patch reworks the locking of configfs_depend_item() and finally makes lockdep happy. [ Note: This hides a few locking interactions with the VFS from lockdep. That was my big concern, because we like lockdep's protection. However, the current state always dumps a spurious warning. The locking is correct, so I tell people to ignore the warning and that we'll keep our eyes on the locking to make sure it stays correct. With this patch, we eliminate the warning. We do lose some of the lockdep protections, but this only means that we still have to keep our eyes on the locking. We're going to do that anyway. -- Joel ] Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
author: Louis Rilling <louis.rilling@kerlabs.com> 2009-01-28 13:18:32 -0500
committer: Joel Becker <joel.becker@oracle.com> 2009-04-30 13:48:23 -0400
commit: e74cc06df3b05e2b2c1611a043f6e6dcadaab1eb (patch)
tree: c514c5f5149ce26ef9b44473bed18a7836540a38
parent: 3c48f23adada870db612a0dd3488605c4af5c0a5 (diff)
3 files changed, 131 insertions, 0 deletions
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 762d287123ca..da6061a6df40 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -39,6 +39,9 @@ struct configfs_dirent {
        umode_t                 s_mode;
        struct dentry           * s_dentry;
        struct iattr            * s_iattr;
+#ifdef CONFIG_LOCKDEP
+        int                     s_depth;
+#endif
 };
 #define CONFIGFS_ROOT           0x0001
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 05373db21a4e..d4d871fba21e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -78,6 +78,92 @@ static const struct dentry_operations configfs_dentry_ops = {
        .d_delete       = configfs_d_delete,
 };
+#ifdef CONFIG_LOCKDEP
+/*
+ * Helpers to make lockdep happy with our recursive locking of default groups'
+ * inodes (see configfs_attach_group() and configfs_detach_group()).
+ * We put default groups i_mutexes in separate classes according to their depth
+ * from the youngest non-default group ancestor.
+ *
+ * For a non-default group A having default groups A/B, A/C, and A/C/D, default
+ * groups A/B and A/C will have their inode's mutex in class
+ * default_group_class[0], and default group A/C/D will be in
+ * default_group_class[1].
+ *
+ * The lock classes are declared and assigned in inode.c, according to the
+ * s_depth value.
+ * The s_depth value is initialized to -1, adjusted to >= 0 when attaching
+ * default groups, and reset to -1 when all default groups are attached. During
+ * attachment, if configfs_create() sees s_depth > 0, the lock class of the new
+ * inode's mutex is set to default_group_class[s_depth - 1].
+ */
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+        sd->s_depth = -1;
+}
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+                                          struct configfs_dirent *sd)
+{
+        int parent_depth = parent_sd->s_depth;
+        if (parent_depth >= 0)
+                sd->s_depth = parent_depth + 1;
+}
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+        /*
+         * item's i_mutex class is already setup, so s_depth is now only
+         * used to set new sub-directories s_depth, which is always done
+         * with item's i_mutex locked.
+         */
+        /*
+         *  sd->s_depth == -1 iff we are a non default group.
+         *  else (we are a default group) sd->s_depth > 0 (see
+         *  create_dir()).
+         */
+        if (sd->s_depth == -1)
+                /*
+                 * We are a non default group and we are going to create
+                 * default groups.
+                 */
+                sd->s_depth = 0;
+}
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+        /* We will not create default groups anymore. */
+        sd->s_depth = -1;
+}
+#else /* CONFIG_LOCKDEP */
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+}
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+                                          struct configfs_dirent *sd)
+{
+}
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+}
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+}
+#endif /* CONFIG_LOCKDEP */
 /*
 * Allocates a new configfs_dirent and links it to the parent configfs_dirent
 */
@@ -94,6 +180,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
        INIT_LIST_HEAD(&sd->s_links);
        INIT_LIST_HEAD(&sd->s_children);
        sd->s_element = element;
+        configfs_init_dirent_depth(sd);
        spin_lock(&configfs_dirent_lock);
        if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
                spin_unlock(&configfs_dirent_lock);
@@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
                error = configfs_make_dirent(p->d_fsdata, d, k, mode,
                                             CONFIGFS_DIR | CONFIGFS_USET_CREATING);
        if (!error) {
+                configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
                error = configfs_create(d, mode, init_dir);
                if (!error) {
                        inc_nlink(p->d_inode);
@@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item,
                 * error, as rmdir() would.
                 */
                mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+                configfs_adjust_dir_dirent_depth_before_populate(sd);
                ret = populate_groups(to_config_group(item));
                if (ret) {
                        configfs_detach_item(item);
                        dentry->d_inode->i_flags |= S_DEAD;
                }
+                configfs_adjust_dir_dirent_depth_after_populate(sd);
                mutex_unlock(&dentry->d_inode->i_mutex);
                if (ret)
                        d_delete(dentry);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 5d349d38e056..4921e7426d95 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -33,10 +33,15 @@
 #include <linux/backing-dev.h>
 #include <linux/capability.h>
 #include <linux/sched.h>
+#include <linux/lockdep.h>
 #include <linux/configfs.h>
 #include "configfs_internal.h"
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
+#endif
 extern struct super_block * configfs_sb;
 static const struct address_space_operations configfs_aops = {
@@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
        return inode;
 }
+#ifdef CONFIG_LOCKDEP
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+                                          struct inode *inode)
+{
+        int depth = sd->s_depth;
+        if (depth > 0) {
+                if (depth <= ARRAY_SIZE(default_group_class)) {
+                        lockdep_set_class(&inode->i_mutex,
+                                          &default_group_class[depth - 1]);
+                } else {
+                        /*
+                         * In practice the maximum level of locking depth is
+                         * already reached. Just inform about possible reasons.
+                         */
+                        printk(KERN_INFO "configfs: Too many levels of inodes"
+                               " for the locking correctness validator.\n");
+                        printk(KERN_INFO "Spurious warnings may appear.\n");
+                }
+        }
+}
+#else /* CONFIG_LOCKDEP */
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+                                          struct inode *inode)
+{
+}
+#endif /* CONFIG_LOCKDEP */
 int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 {
        int error = 0;
@@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
                                        struct inode *p_inode = dentry->d_parent->d_inode;
                                        p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
                                }
+                                configfs_set_inode_lock_class(sd, inode);
                                goto Proceed;
                        }
                        else
author	Louis Rilling <louis.rilling@kerlabs.com>	2009-01-28 13:18:32 -0500
committer	Joel Becker <joel.becker@oracle.com>	2009-04-30 13:48:23 -0400
commit	e74cc06df3b05e2b2c1611a043f6e6dcadaab1eb (patch)
tree	c514c5f5149ce26ef9b44473bed18a7836540a38
parent	3c48f23adada870db612a0dd3488605c4af5c0a5 (diff)

diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 762d287123ca..da6061a6df40 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h
@@ -39,6 +39,9 @@ struct configfs_dirent {
39	umode_t s_mode;	39	umode_t s_mode;
40	struct dentry * s_dentry;	40	struct dentry * s_dentry;
41	struct iattr * s_iattr;	41	struct iattr * s_iattr;
		42	#ifdef CONFIG_LOCKDEP
		43	int s_depth;
		44	#endif
42	};	45	};
43		46
44	#define CONFIGFS_ROOT 0x0001	47	#define CONFIGFS_ROOT 0x0001


diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 05373db21a4e..d4d871fba21e 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c
@@ -78,6 +78,92 @@ static const struct dentry_operations configfs_dentry_ops = {
78	.d_delete = configfs_d_delete,	78	.d_delete = configfs_d_delete,
79	};	79	};
80		80
		81	#ifdef CONFIG_LOCKDEP
		82
		83	/*
		84	* Helpers to make lockdep happy with our recursive locking of default groups'
		85	* inodes (see configfs_attach_group() and configfs_detach_group()).
		86	* We put default groups i_mutexes in separate classes according to their depth
		87	* from the youngest non-default group ancestor.
		88	*
		89	* For a non-default group A having default groups A/B, A/C, and A/C/D, default
		90	* groups A/B and A/C will have their inode's mutex in class
		91	* default_group_class[0], and default group A/C/D will be in
		92	* default_group_class[1].
		93	*
		94	* The lock classes are declared and assigned in inode.c, according to the
		95	* s_depth value.
		96	* The s_depth value is initialized to -1, adjusted to >= 0 when attaching
		97	* default groups, and reset to -1 when all default groups are attached. During
		98	* attachment, if configfs_create() sees s_depth > 0, the lock class of the new
		99	* inode's mutex is set to default_group_class[s_depth - 1].
		100	*/
		101
		102	static void configfs_init_dirent_depth(struct configfs_dirent *sd)
		103	{
		104	sd->s_depth = -1;
		105	}
		106
		107	static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
		108	struct configfs_dirent *sd)
		109	{
		110	int parent_depth = parent_sd->s_depth;
		111
		112	if (parent_depth >= 0)
		113	sd->s_depth = parent_depth + 1;
		114	}
		115
		116	static void
		117	configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
		118	{
		119	/*
		120	* item's i_mutex class is already setup, so s_depth is now only
		121	* used to set new sub-directories s_depth, which is always done
		122	* with item's i_mutex locked.
		123	*/
		124	/*
		125	* sd->s_depth == -1 iff we are a non default group.
		126	* else (we are a default group) sd->s_depth > 0 (see
		127	* create_dir()).
		128	*/
		129	if (sd->s_depth == -1)
		130	/*
		131	* We are a non default group and we are going to create
		132	* default groups.
		133	*/
		134	sd->s_depth = 0;
		135	}
		136
		137	static void
		138	configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
		139	{
		140	/* We will not create default groups anymore. */
		141	sd->s_depth = -1;
		142	}
		143
		144	#else /* CONFIG_LOCKDEP */
		145
		146	static void configfs_init_dirent_depth(struct configfs_dirent *sd)
		147	{
		148	}
		149
		150	static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
		151	struct configfs_dirent *sd)
		152	{
		153	}
		154
		155	static void
		156	configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
		157	{
		158	}
		159
		160	static void
		161	configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
		162	{
		163	}
		164
		165	#endif /* CONFIG_LOCKDEP */
		166
81	/*	167	/*
82	* Allocates a new configfs_dirent and links it to the parent configfs_dirent	168	* Allocates a new configfs_dirent and links it to the parent configfs_dirent
83	*/	169	*/
@@ -94,6 +180,7 @@ static struct configfs_dirent configfs_new_dirent(struct configfs_dirent pare
94	INIT_LIST_HEAD(&sd->s_links);	180	INIT_LIST_HEAD(&sd->s_links);
95	INIT_LIST_HEAD(&sd->s_children);	181	INIT_LIST_HEAD(&sd->s_children);
96	sd->s_element = element;	182	sd->s_element = element;
		183	configfs_init_dirent_depth(sd);
97	spin_lock(&configfs_dirent_lock);	184	spin_lock(&configfs_dirent_lock);
98	if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {	185	if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
99	spin_unlock(&configfs_dirent_lock);	186	spin_unlock(&configfs_dirent_lock);
@@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
187	error = configfs_make_dirent(p->d_fsdata, d, k, mode,	274	error = configfs_make_dirent(p->d_fsdata, d, k, mode,
188	CONFIGFS_DIR \| CONFIGFS_USET_CREATING);	275	CONFIGFS_DIR \| CONFIGFS_USET_CREATING);
189	if (!error) {	276	if (!error) {
		277	configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
190	error = configfs_create(d, mode, init_dir);	278	error = configfs_create(d, mode, init_dir);
191	if (!error) {	279	if (!error) {
192	inc_nlink(p->d_inode);	280	inc_nlink(p->d_inode);
@@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item,
789	* error, as rmdir() would.	877	* error, as rmdir() would.
790	*/	878	*/
791	mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);	879	mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
		880	configfs_adjust_dir_dirent_depth_before_populate(sd);
792	ret = populate_groups(to_config_group(item));	881	ret = populate_groups(to_config_group(item));
793	if (ret) {	882	if (ret) {
794	configfs_detach_item(item);	883	configfs_detach_item(item);
795	dentry->d_inode->i_flags \|= S_DEAD;	884	dentry->d_inode->i_flags \|= S_DEAD;
796	}	885	}
		886	configfs_adjust_dir_dirent_depth_after_populate(sd);
797	mutex_unlock(&dentry->d_inode->i_mutex);	887	mutex_unlock(&dentry->d_inode->i_mutex);
798	if (ret)	888	if (ret)
799	d_delete(dentry);	889	d_delete(dentry);


diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5d349d38e056..4921e7426d95 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c
@@ -33,10 +33,15 @@
33	#include <linux/backing-dev.h>	33	#include <linux/backing-dev.h>
34	#include <linux/capability.h>	34	#include <linux/capability.h>
35	#include <linux/sched.h>	35	#include <linux/sched.h>
		36	#include <linux/lockdep.h>
36		37
37	#include <linux/configfs.h>	38	#include <linux/configfs.h>
38	#include "configfs_internal.h"	39	#include "configfs_internal.h"
39		40
		41	#ifdef CONFIG_LOCKDEP
		42	static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
		43	#endif
		44
40	extern struct super_block * configfs_sb;	45	extern struct super_block * configfs_sb;
41		46
42	static const struct address_space_operations configfs_aops = {	47	static const struct address_space_operations configfs_aops = {
@@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
150	return inode;	155	return inode;
151	}	156	}
152		157
		158	#ifdef CONFIG_LOCKDEP
		159
		160	static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
		161	struct inode *inode)
		162	{
		163	int depth = sd->s_depth;
		164
		165	if (depth > 0) {
		166	if (depth <= ARRAY_SIZE(default_group_class)) {
		167	lockdep_set_class(&inode->i_mutex,
		168	&default_group_class[depth - 1]);
		169	} else {
		170	/*
		171	* In practice the maximum level of locking depth is
		172	* already reached. Just inform about possible reasons.
		173	*/
		174	printk(KERN_INFO "configfs: Too many levels of inodes"
		175	" for the locking correctness validator.\n");
		176	printk(KERN_INFO "Spurious warnings may appear.\n");
		177	}
		178	}
		179	}
		180
		181	#else /* CONFIG_LOCKDEP */
		182
		183	static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
		184	struct inode *inode)
		185	{
		186	}
		187
		188	#endif /* CONFIG_LOCKDEP */
		189
153	int configfs_create(struct dentry * dentry, int mode, int (init)(struct inode ))	190	int configfs_create(struct dentry * dentry, int mode, int (init)(struct inode ))
154	{	191	{
155	int error = 0;	192	int error = 0;
@@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (init)(struct inode
162	struct inode *p_inode = dentry->d_parent->d_inode;	199	struct inode *p_inode = dentry->d_parent->d_inode;
163	p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;	200	p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
164	}	201	}
		202	configfs_set_inode_lock_class(sd, inode);
165	goto Proceed;	203	goto Proceed;
166	}	204	}
167	else	205	else