aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-08-01 14:54:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-01 14:54:05 -0400
commit63a16f90167850010864a9e8ebb71d216983090f (patch)
treec4b284cc596421ac8100e0ad3b2f56ead4563d2d /fs
parent5adf2b03d97111c8955495ba11e8b7db27df8695 (diff)
parentc259ae52e204d42f8b2d484c85517a4c367030e1 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: [PATCH] ocfs2: Release mutex in error handling code [PATCH] ocfs2: Fix oops when racing files truncates with writes into an mmap region [PATCH 2/2] ocfs2: Fix race between mount and recovery [PATCH 1/2] ocfs2: Add counter in struct ocfs2_dinode to track journal replays [PATCH] configfs: Convenience macros for attribute definition. [PATCH] configfs: Pin configfs subsystems separately from new config_items. [PATCH] configfs: Fix open directory making rmdir() fail [PATCH] configfs: Lock new directory inodes before removing on cleanup after failure [PATCH] configfs: Prevent userspace from creating new entries under attaching directories [PATCH] configfs: Fix failing symlink() making rmdir() fail [PATCH] configfs: Fix symlink() to a removing item [PATCH] configfs: Include linux/err.h in linux/configfs.h
Diffstat (limited to 'fs')
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c210
-rw-r--r--fs/configfs/symlink.c26
-rw-r--r--fs/ocfs2/aops.c29
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c173
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/super.c12
10 files changed, 367 insertions, 98 deletions
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3ea..762d287123ca 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
49#define CONFIGFS_USET_DEFAULT 0x0080 49#define CONFIGFS_USET_DEFAULT 0x0080
50#define CONFIGFS_USET_DROPPING 0x0100 50#define CONFIGFS_USET_DROPPING 0x0100
51#define CONFIGFS_USET_IN_MKDIR 0x0200 51#define CONFIGFS_USET_IN_MKDIR 0x0200
52#define CONFIGFS_USET_CREATING 0x0400
52#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) 53#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
53 54
55extern struct mutex configfs_symlink_mutex;
54extern spinlock_t configfs_dirent_lock; 56extern spinlock_t configfs_dirent_lock;
55 57
56extern struct vfsmount * configfs_mount; 58extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
66extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 68extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
67extern int configfs_make_dirent(struct configfs_dirent *, 69extern int configfs_make_dirent(struct configfs_dirent *,
68 struct dentry *, void *, umode_t, int); 70 struct dentry *, void *, umode_t, int);
71extern int configfs_dirent_is_ready(struct configfs_dirent *);
69 72
70extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); 73extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
71extern void configfs_hash_and_remove(struct dentry * dir, const char * name); 74extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..7a8db78a91d2 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
186 if (!error) 186 if (!error)
187 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 187 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
188 CONFIGFS_DIR); 188 CONFIGFS_DIR | CONFIGFS_USET_CREATING);
189 if (!error) { 189 if (!error) {
190 error = configfs_create(d, mode, init_dir); 190 error = configfs_create(d, mode, init_dir);
191 if (!error) { 191 if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
209 * configfs_create_dir - create a directory for an config_item. 209 * configfs_create_dir - create a directory for an config_item.
210 * @item: config_itemwe're creating directory for. 210 * @item: config_itemwe're creating directory for.
211 * @dentry: config_item's dentry. 211 * @dentry: config_item's dentry.
212 *
213 * Note: user-created entries won't be allowed under this new directory
214 * until it is validated by configfs_dir_set_ready()
212 */ 215 */
213 216
214static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 217static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
231 return error; 234 return error;
232} 235}
233 236
237/*
238 * Allow userspace to create new entries under a new directory created with
239 * configfs_create_dir(), and under all of its chidlren directories recursively.
240 * @sd configfs_dirent of the new directory to validate
241 *
242 * Caller must hold configfs_dirent_lock.
243 */
244static void configfs_dir_set_ready(struct configfs_dirent *sd)
245{
246 struct configfs_dirent *child_sd;
247
248 sd->s_type &= ~CONFIGFS_USET_CREATING;
249 list_for_each_entry(child_sd, &sd->s_children, s_sibling)
250 if (child_sd->s_type & CONFIGFS_USET_CREATING)
251 configfs_dir_set_ready(child_sd);
252}
253
254/*
255 * Check that a directory does not belong to a directory hierarchy being
256 * attached and not validated yet.
257 * @sd configfs_dirent of the directory to check
258 *
259 * @return non-zero iff the directory was validated
260 *
261 * Note: takes configfs_dirent_lock, so the result may change from false to true
262 * in two consecutive calls, but never from true to false.
263 */
264int configfs_dirent_is_ready(struct configfs_dirent *sd)
265{
266 int ret;
267
268 spin_lock(&configfs_dirent_lock);
269 ret = !(sd->s_type & CONFIGFS_USET_CREATING);
270 spin_unlock(&configfs_dirent_lock);
271
272 return ret;
273}
274
234int configfs_create_link(struct configfs_symlink *sl, 275int configfs_create_link(struct configfs_symlink *sl,
235 struct dentry *parent, 276 struct dentry *parent,
236 struct dentry *dentry) 277 struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
283 * The only thing special about this is that we remove any files in 324 * The only thing special about this is that we remove any files in
284 * the directory before we remove the directory, and we've inlined 325 * the directory before we remove the directory, and we've inlined
285 * what used to be configfs_rmdir() below, instead of calling separately. 326 * what used to be configfs_rmdir() below, instead of calling separately.
327 *
328 * Caller holds the mutex of the item's inode
286 */ 329 */
287 330
288static void configfs_remove_dir(struct config_item * item) 331static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
330 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 373 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
331 struct configfs_dirent * sd; 374 struct configfs_dirent * sd;
332 int found = 0; 375 int found = 0;
333 int err = 0; 376 int err;
377
378 /*
379 * Fake invisibility if dir belongs to a group/default groups hierarchy
380 * being attached
381 *
382 * This forbids userspace to read/write attributes of items which may
383 * not complete their initialization, since the dentries of the
384 * attributes won't be instantiated.
385 */
386 err = -ENOENT;
387 if (!configfs_dirent_is_ready(parent_sd))
388 goto out;
334 389
335 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 390 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
336 if (sd->s_type & CONFIGFS_NOT_PINNED) { 391 if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
353 return simple_lookup(dir, dentry, nd); 408 return simple_lookup(dir, dentry, nd);
354 } 409 }
355 410
411out:
356 return ERR_PTR(err); 412 return ERR_PTR(err);
357} 413}
358 414
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
370 struct configfs_dirent *sd; 426 struct configfs_dirent *sd;
371 int ret; 427 int ret;
372 428
429 /* Mark that we're trying to drop the group */
430 parent_sd->s_type |= CONFIGFS_USET_DROPPING;
431
373 ret = -EBUSY; 432 ret = -EBUSY;
374 if (!list_empty(&parent_sd->s_links)) 433 if (!list_empty(&parent_sd->s_links))
375 goto out; 434 goto out;
376 435
377 ret = 0; 436 ret = 0;
378 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 437 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
379 if (sd->s_type & CONFIGFS_NOT_PINNED) 438 if (!sd->s_element ||
439 (sd->s_type & CONFIGFS_NOT_PINNED))
380 continue; 440 continue;
381 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 441 if (sd->s_type & CONFIGFS_USET_DEFAULT) {
382 /* Abort if racing with mkdir() */ 442 /* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
385 *wait_mutex = &sd->s_dentry->d_inode->i_mutex; 445 *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
386 return -EAGAIN; 446 return -EAGAIN;
387 } 447 }
388 /* Mark that we're trying to drop the group */
389 sd->s_type |= CONFIGFS_USET_DROPPING;
390 448
391 /* 449 /*
392 * Yup, recursive. If there's a problem, blame 450 * Yup, recursive. If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
414 struct configfs_dirent *parent_sd = dentry->d_fsdata; 472 struct configfs_dirent *parent_sd = dentry->d_fsdata;
415 struct configfs_dirent *sd; 473 struct configfs_dirent *sd;
416 474
417 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 475 parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
418 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 476
477 list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
478 if (sd->s_type & CONFIGFS_USET_DEFAULT)
419 configfs_detach_rollback(sd->s_dentry); 479 configfs_detach_rollback(sd->s_dentry);
420 sd->s_type &= ~CONFIGFS_USET_DROPPING;
421 }
422 }
423} 480}
424 481
425static void detach_attrs(struct config_item * item) 482static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
558static int populate_groups(struct config_group *group) 615static int populate_groups(struct config_group *group)
559{ 616{
560 struct config_group *new_group; 617 struct config_group *new_group;
561 struct dentry *dentry = group->cg_item.ci_dentry;
562 int ret = 0; 618 int ret = 0;
563 int i; 619 int i;
564 620
565 if (group->default_groups) { 621 if (group->default_groups) {
566 /*
567 * FYI, we're faking mkdir here
568 * I'm not sure we need this semaphore, as we're called
569 * from our parent's mkdir. That holds our parent's
570 * i_mutex, so afaik lookup cannot continue through our
571 * parent to find us, let alone mess with our tree.
572 * That said, taking our i_mutex is closer to mkdir
573 * emulation, and shouldn't hurt.
574 */
575 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
576
577 for (i = 0; group->default_groups[i]; i++) { 622 for (i = 0; group->default_groups[i]; i++) {
578 new_group = group->default_groups[i]; 623 new_group = group->default_groups[i];
579 624
580 ret = create_default_group(group, new_group); 625 ret = create_default_group(group, new_group);
581 if (ret) 626 if (ret) {
627 detach_groups(group);
582 break; 628 break;
629 }
583 } 630 }
584
585 mutex_unlock(&dentry->d_inode->i_mutex);
586 } 631 }
587 632
588 if (ret)
589 detach_groups(group);
590
591 return ret; 633 return ret;
592} 634}
593 635
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
702 if (!ret) { 744 if (!ret) {
703 ret = populate_attrs(item); 745 ret = populate_attrs(item);
704 if (ret) { 746 if (ret) {
747 /*
748 * We are going to remove an inode and its dentry but
749 * the VFS may already have hit and used them. Thus,
750 * we must lock them as rmdir() would.
751 */
752 mutex_lock(&dentry->d_inode->i_mutex);
705 configfs_remove_dir(item); 753 configfs_remove_dir(item);
754 dentry->d_inode->i_flags |= S_DEAD;
755 mutex_unlock(&dentry->d_inode->i_mutex);
706 d_delete(dentry); 756 d_delete(dentry);
707 } 757 }
708 } 758 }
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
710 return ret; 760 return ret;
711} 761}
712 762
763/* Caller holds the mutex of the item's inode */
713static void configfs_detach_item(struct config_item *item) 764static void configfs_detach_item(struct config_item *item)
714{ 765{
715 detach_attrs(item); 766 detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
728 sd = dentry->d_fsdata; 779 sd = dentry->d_fsdata;
729 sd->s_type |= CONFIGFS_USET_DIR; 780 sd->s_type |= CONFIGFS_USET_DIR;
730 781
782 /*
783 * FYI, we're faking mkdir in populate_groups()
784 * We must lock the group's inode to avoid races with the VFS
785 * which can already hit the inode and try to add/remove entries
786 * under it.
787 *
788 * We must also lock the inode to remove it safely in case of
789 * error, as rmdir() would.
790 */
791 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
731 ret = populate_groups(to_config_group(item)); 792 ret = populate_groups(to_config_group(item));
732 if (ret) { 793 if (ret) {
733 configfs_detach_item(item); 794 configfs_detach_item(item);
734 d_delete(dentry); 795 dentry->d_inode->i_flags |= S_DEAD;
735 } 796 }
797 mutex_unlock(&dentry->d_inode->i_mutex);
798 if (ret)
799 d_delete(dentry);
736 } 800 }
737 801
738 return ret; 802 return ret;
739} 803}
740 804
805/* Caller holds the mutex of the group's inode */
741static void configfs_detach_group(struct config_item *item) 806static void configfs_detach_group(struct config_item *item)
742{ 807{
743 detach_groups(to_config_group(item)); 808 detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1035 struct configfs_subsystem *subsys; 1100 struct configfs_subsystem *subsys;
1036 struct configfs_dirent *sd; 1101 struct configfs_dirent *sd;
1037 struct config_item_type *type; 1102 struct config_item_type *type;
1038 struct module *owner = NULL; 1103 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1039 char *name; 1104 char *name;
1040 1105
1041 if (dentry->d_parent == configfs_sb->s_root) { 1106 if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1044 } 1109 }
1045 1110
1046 sd = dentry->d_parent->d_fsdata; 1111 sd = dentry->d_parent->d_fsdata;
1112
1113 /*
1114 * Fake invisibility if dir belongs to a group/default groups hierarchy
1115 * being attached
1116 */
1117 if (!configfs_dirent_is_ready(sd)) {
1118 ret = -ENOENT;
1119 goto out;
1120 }
1121
1047 if (!(sd->s_type & CONFIGFS_USET_DIR)) { 1122 if (!(sd->s_type & CONFIGFS_USET_DIR)) {
1048 ret = -EPERM; 1123 ret = -EPERM;
1049 goto out; 1124 goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1062 goto out_put; 1137 goto out_put;
1063 } 1138 }
1064 1139
1140 /*
1141 * The subsystem may belong to a different module than the item
1142 * being created. We don't want to safely pin the new item but
1143 * fail to pin the subsystem it sits under.
1144 */
1145 if (!subsys->su_group.cg_item.ci_type) {
1146 ret = -EINVAL;
1147 goto out_put;
1148 }
1149 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1150 if (!try_module_get(subsys_owner)) {
1151 ret = -EINVAL;
1152 goto out_put;
1153 }
1154
1065 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); 1155 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
1066 if (!name) { 1156 if (!name) {
1067 ret = -ENOMEM; 1157 ret = -ENOMEM;
1068 goto out_put; 1158 goto out_subsys_put;
1069 } 1159 }
1070 1160
1071 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 1161 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1094 kfree(name); 1184 kfree(name);
1095 if (ret) { 1185 if (ret) {
1096 /* 1186 /*
1097 * If item == NULL, then link_obj() was never called. 1187 * If ret != 0, then link_obj() was never called.
1098 * There are no extra references to clean up. 1188 * There are no extra references to clean up.
1099 */ 1189 */
1100 goto out_put; 1190 goto out_subsys_put;
1101 } 1191 }
1102 1192
1103 /* 1193 /*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1111 goto out_unlink; 1201 goto out_unlink;
1112 } 1202 }
1113 1203
1114 owner = type->ct_owner; 1204 new_item_owner = type->ct_owner;
1115 if (!try_module_get(owner)) { 1205 if (!try_module_get(new_item_owner)) {
1116 ret = -EINVAL; 1206 ret = -EINVAL;
1117 goto out_unlink; 1207 goto out_unlink;
1118 } 1208 }
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1142 1232
1143 spin_lock(&configfs_dirent_lock); 1233 spin_lock(&configfs_dirent_lock);
1144 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; 1234 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
1235 if (!ret)
1236 configfs_dir_set_ready(dentry->d_fsdata);
1145 spin_unlock(&configfs_dirent_lock); 1237 spin_unlock(&configfs_dirent_lock);
1146 1238
1147out_unlink: 1239out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
1159 mutex_unlock(&subsys->su_mutex); 1251 mutex_unlock(&subsys->su_mutex);
1160 1252
1161 if (module_got) 1253 if (module_got)
1162 module_put(owner); 1254 module_put(new_item_owner);
1163 } 1255 }
1164 1256
1257out_subsys_put:
1258 if (ret)
1259 module_put(subsys_owner);
1260
1165out_put: 1261out_put:
1166 /* 1262 /*
1167 * link_obj()/link_group() took a reference from child->parent, 1263 * link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1180 struct config_item *item; 1276 struct config_item *item;
1181 struct configfs_subsystem *subsys; 1277 struct configfs_subsystem *subsys;
1182 struct configfs_dirent *sd; 1278 struct configfs_dirent *sd;
1183 struct module *owner = NULL; 1279 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1184 int ret; 1280 int ret;
1185 1281
1186 if (dentry->d_parent == configfs_sb->s_root) 1282 if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1207 return -EINVAL; 1303 return -EINVAL;
1208 } 1304 }
1209 1305
1306 /* configfs_mkdir() shouldn't have allowed this */
1307 BUG_ON(!subsys->su_group.cg_item.ci_type);
1308 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1309
1310 /*
1311 * Ensure that no racing symlink() will make detach_prep() fail while
1312 * the new link is temporarily attached
1313 */
1314 mutex_lock(&configfs_symlink_mutex);
1210 spin_lock(&configfs_dirent_lock); 1315 spin_lock(&configfs_dirent_lock);
1211 do { 1316 do {
1212 struct mutex *wait_mutex; 1317 struct mutex *wait_mutex;
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1215 if (ret) { 1320 if (ret) {
1216 configfs_detach_rollback(dentry); 1321 configfs_detach_rollback(dentry);
1217 spin_unlock(&configfs_dirent_lock); 1322 spin_unlock(&configfs_dirent_lock);
1323 mutex_unlock(&configfs_symlink_mutex);
1218 if (ret != -EAGAIN) { 1324 if (ret != -EAGAIN) {
1219 config_item_put(parent_item); 1325 config_item_put(parent_item);
1220 return ret; 1326 return ret;
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1224 mutex_lock(wait_mutex); 1330 mutex_lock(wait_mutex);
1225 mutex_unlock(wait_mutex); 1331 mutex_unlock(wait_mutex);
1226 1332
1333 mutex_lock(&configfs_symlink_mutex);
1227 spin_lock(&configfs_dirent_lock); 1334 spin_lock(&configfs_dirent_lock);
1228 } 1335 }
1229 } while (ret == -EAGAIN); 1336 } while (ret == -EAGAIN);
1230 spin_unlock(&configfs_dirent_lock); 1337 spin_unlock(&configfs_dirent_lock);
1338 mutex_unlock(&configfs_symlink_mutex);
1231 1339
1232 /* Get a working ref for the duration of this function */ 1340 /* Get a working ref for the duration of this function */
1233 item = configfs_get_config_item(dentry); 1341 item = configfs_get_config_item(dentry);
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1236 config_item_put(parent_item); 1344 config_item_put(parent_item);
1237 1345
1238 if (item->ci_type) 1346 if (item->ci_type)
1239 owner = item->ci_type->ct_owner; 1347 dead_item_owner = item->ci_type->ct_owner;
1240 1348
1241 if (sd->s_type & CONFIGFS_USET_DIR) { 1349 if (sd->s_type & CONFIGFS_USET_DIR) {
1242 configfs_detach_group(item); 1350 configfs_detach_group(item);
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1258 /* Drop our reference from above */ 1366 /* Drop our reference from above */
1259 config_item_put(item); 1367 config_item_put(item);
1260 1368
1261 module_put(owner); 1369 module_put(dead_item_owner);
1370 module_put(subsys_owner);
1262 1371
1263 return 0; 1372 return 0;
1264} 1373}
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
1314{ 1423{
1315 struct dentry * dentry = file->f_path.dentry; 1424 struct dentry * dentry = file->f_path.dentry;
1316 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1425 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1426 int err;
1317 1427
1318 mutex_lock(&dentry->d_inode->i_mutex); 1428 mutex_lock(&dentry->d_inode->i_mutex);
1319 file->private_data = configfs_new_dirent(parent_sd, NULL); 1429 /*
1430 * Fake invisibility if dir belongs to a group/default groups hierarchy
1431 * being attached
1432 */
1433 err = -ENOENT;
1434 if (configfs_dirent_is_ready(parent_sd)) {
1435 file->private_data = configfs_new_dirent(parent_sd, NULL);
1436 if (IS_ERR(file->private_data))
1437 err = PTR_ERR(file->private_data);
1438 else
1439 err = 0;
1440 }
1320 mutex_unlock(&dentry->d_inode->i_mutex); 1441 mutex_unlock(&dentry->d_inode->i_mutex);
1321 1442
1322 return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; 1443 return err;
1323
1324} 1444}
1325 1445
1326static int configfs_dir_close(struct inode *inode, struct file *file) 1446static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1491 if (err) { 1611 if (err) {
1492 d_delete(dentry); 1612 d_delete(dentry);
1493 dput(dentry); 1613 dput(dentry);
1614 } else {
1615 spin_lock(&configfs_dirent_lock);
1616 configfs_dir_set_ready(dentry->d_fsdata);
1617 spin_unlock(&configfs_dirent_lock);
1494 } 1618 }
1495 } 1619 }
1496 1620
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1517 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1641 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
1518 I_MUTEX_PARENT); 1642 I_MUTEX_PARENT);
1519 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1643 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1644 mutex_lock(&configfs_symlink_mutex);
1520 spin_lock(&configfs_dirent_lock); 1645 spin_lock(&configfs_dirent_lock);
1521 if (configfs_detach_prep(dentry, NULL)) { 1646 if (configfs_detach_prep(dentry, NULL)) {
1522 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); 1647 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
1523 } 1648 }
1524 spin_unlock(&configfs_dirent_lock); 1649 spin_unlock(&configfs_dirent_lock);
1650 mutex_unlock(&configfs_symlink_mutex);
1525 configfs_detach_group(&group->cg_item); 1651 configfs_detach_group(&group->cg_item);
1526 dentry->d_inode->i_flags |= S_DEAD; 1652 dentry->d_inode->i_flags |= S_DEAD;
1527 mutex_unlock(&dentry->d_inode->i_mutex); 1653 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40ac..bf74973b0492 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
31#include <linux/configfs.h> 31#include <linux/configfs.h>
32#include "configfs_internal.h" 32#include "configfs_internal.h"
33 33
34/* Protects attachments of new symlinks */
35DEFINE_MUTEX(configfs_symlink_mutex);
36
34static int item_depth(struct config_item * item) 37static int item_depth(struct config_item * item)
35{ 38{
36 struct config_item * p = item; 39 struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
73 struct configfs_symlink *sl; 76 struct configfs_symlink *sl;
74 int ret; 77 int ret;
75 78
79 ret = -ENOENT;
80 if (!configfs_dirent_is_ready(target_sd))
81 goto out;
76 ret = -ENOMEM; 82 ret = -ENOMEM;
77 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); 83 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
78 if (sl) { 84 if (sl) {
79 sl->sl_target = config_item_get(item); 85 sl->sl_target = config_item_get(item);
80 spin_lock(&configfs_dirent_lock); 86 spin_lock(&configfs_dirent_lock);
87 if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
88 spin_unlock(&configfs_dirent_lock);
89 config_item_put(item);
90 kfree(sl);
91 return -ENOENT;
92 }
81 list_add(&sl->sl_list, &target_sd->s_links); 93 list_add(&sl->sl_list, &target_sd->s_links);
82 spin_unlock(&configfs_dirent_lock); 94 spin_unlock(&configfs_dirent_lock);
83 ret = configfs_create_link(sl, parent_item->ci_dentry, 95 ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
91 } 103 }
92 } 104 }
93 105
106out:
94 return ret; 107 return ret;
95} 108}
96 109
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
120{ 133{
121 int ret; 134 int ret;
122 struct nameidata nd; 135 struct nameidata nd;
136 struct configfs_dirent *sd;
123 struct config_item *parent_item; 137 struct config_item *parent_item;
124 struct config_item *target_item; 138 struct config_item *target_item;
125 struct config_item_type *type; 139 struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
128 if (dentry->d_parent == configfs_sb->s_root) 142 if (dentry->d_parent == configfs_sb->s_root)
129 goto out; 143 goto out;
130 144
145 sd = dentry->d_parent->d_fsdata;
146 /*
147 * Fake invisibility if dir belongs to a group/default groups hierarchy
148 * being attached
149 */
150 ret = -ENOENT;
151 if (!configfs_dirent_is_ready(sd))
152 goto out;
153
131 parent_item = configfs_get_config_item(dentry->d_parent); 154 parent_item = configfs_get_config_item(dentry->d_parent);
132 type = parent_item->ci_type; 155 type = parent_item->ci_type;
133 156
157 ret = -EPERM;
134 if (!type || !type->ct_item_ops || 158 if (!type || !type->ct_item_ops ||
135 !type->ct_item_ops->allow_link) 159 !type->ct_item_ops->allow_link)
136 goto out_put; 160 goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 165
142 ret = type->ct_item_ops->allow_link(parent_item, target_item); 166 ret = type->ct_item_ops->allow_link(parent_item, target_item);
143 if (!ret) { 167 if (!ret) {
168 mutex_lock(&configfs_symlink_mutex);
144 ret = create_link(parent_item, target_item, dentry); 169 ret = create_link(parent_item, target_item, dentry);
170 mutex_unlock(&configfs_symlink_mutex);
145 if (ret && type->ct_item_ops->drop_link) 171 if (ret && type->ct_item_ops->drop_link)
146 type->ct_item_ops->drop_link(parent_item, 172 type->ct_item_ops->drop_link(parent_item,
147 target_item); 173 target_item);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6d..506c24fb5078 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
1073 for(i = 0; i < wc->w_num_pages; i++) { 1073 for(i = 0; i < wc->w_num_pages; i++) {
1074 tmppage = wc->w_pages[i]; 1074 tmppage = wc->w_pages[i];
1075 1075
1076 if (ocfs2_should_order_data(inode)) 1076 if (page_has_buffers(tmppage)) {
1077 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1077 if (ocfs2_should_order_data(inode))
1078 from, to, NULL, 1078 walk_page_buffers(wc->w_handle,
1079 ocfs2_journal_dirty_data); 1079 page_buffers(tmppage),
1080 1080 from, to, NULL,
1081 block_commit_write(tmppage, from, to); 1081 ocfs2_journal_dirty_data);
1082
1083 block_commit_write(tmppage, from, to);
1084 }
1082 } 1085 }
1083} 1086}
1084 1087
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1901 to = PAGE_CACHE_SIZE; 1904 to = PAGE_CACHE_SIZE;
1902 } 1905 }
1903 1906
1904 if (ocfs2_should_order_data(inode)) 1907 if (page_has_buffers(tmppage)) {
1905 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1908 if (ocfs2_should_order_data(inode))
1906 from, to, NULL, 1909 walk_page_buffers(wc->w_handle,
1907 ocfs2_journal_dirty_data); 1910 page_buffers(tmppage),
1908 1911 from, to, NULL,
1909 block_commit_write(tmppage, from, to); 1912 ocfs2_journal_dirty_data);
1913 block_commit_write(tmppage, from, to);
1914 }
1910 } 1915 }
1911 1916
1912out_write_size: 1917out_write_size:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1d..ec2ed15c3daa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
1766out_rw_unlock: 1766out_rw_unlock:
1767 ocfs2_rw_unlock(inode, 1); 1767 ocfs2_rw_unlock(inode, 1);
1768 1768
1769 mutex_unlock(&inode->i_mutex);
1770out: 1769out:
1770 mutex_unlock(&inode->i_mutex);
1771 return ret; 1771 return ret;
1772} 1772}
1773 1773
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfdd..7a37240f7a31 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
57static int ocfs2_commit_cache(struct ocfs2_super *osb); 57static int ocfs2_commit_cache(struct ocfs2_super *osb);
58static int ocfs2_wait_on_mount(struct ocfs2_super *osb); 58static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
60 int dirty); 60 int dirty, int replayed);
61static int ocfs2_trylock_journal(struct ocfs2_super *osb, 61static int ocfs2_trylock_journal(struct ocfs2_super *osb,
62 int slot_num); 62 int slot_num);
63static int ocfs2_recover_orphans(struct ocfs2_super *osb, 63static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
562 return status; 562 return status;
563} 563}
564 564
565static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
566{
567 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
568}
569
570static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
571{
572 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
573}
574
565static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 575static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
566 int dirty) 576 int dirty, int replayed)
567{ 577{
568 int status; 578 int status;
569 unsigned int flags; 579 unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
593 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 603 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
594 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 604 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
595 605
606 if (replayed)
607 ocfs2_bump_recovery_generation(fe);
608
596 status = ocfs2_write_block(osb, bh, journal->j_inode); 609 status = ocfs2_write_block(osb, bh, journal->j_inode);
597 if (status < 0) 610 if (status < 0)
598 mlog_errno(status); 611 mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
667 * Do not toggle if flush was unsuccessful otherwise 680 * Do not toggle if flush was unsuccessful otherwise
668 * will leave dirty metadata in a "clean" journal 681 * will leave dirty metadata in a "clean" journal
669 */ 682 */
670 status = ocfs2_journal_toggle_dirty(osb, 0); 683 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
671 if (status < 0) 684 if (status < 0)
672 mlog_errno(status); 685 mlog_errno(status);
673 } 686 }
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
710 } 723 }
711} 724}
712 725
713int ocfs2_journal_load(struct ocfs2_journal *journal, int local) 726int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
714{ 727{
715 int status = 0; 728 int status = 0;
716 struct ocfs2_super *osb; 729 struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
729 742
730 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); 743 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
731 744
732 status = ocfs2_journal_toggle_dirty(osb, 1); 745 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
733 if (status < 0) { 746 if (status < 0) {
734 mlog_errno(status); 747 mlog_errno(status);
735 goto done; 748 goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
771 goto bail; 784 goto bail;
772 } 785 }
773 786
774 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); 787 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
775 if (status < 0) 788 if (status < 0)
776 mlog_errno(status); 789 mlog_errno(status);
777 790
@@ -1034,6 +1047,12 @@ restart:
1034 spin_unlock(&osb->osb_lock); 1047 spin_unlock(&osb->osb_lock);
1035 mlog(0, "All nodes recovered\n"); 1048 mlog(0, "All nodes recovered\n");
1036 1049
1050 /* Refresh all journal recovery generations from disk */
1051 status = ocfs2_check_journals_nolocks(osb);
1052 status = (status == -EROFS) ? 0 : status;
1053 if (status < 0)
1054 mlog_errno(status);
1055
1037 ocfs2_super_unlock(osb, 1); 1056 ocfs2_super_unlock(osb, 1);
1038 1057
1039 /* We always run recovery on our own orphan dir - the dead 1058 /* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
1096 mlog_exit_void(); 1115 mlog_exit_void();
1097} 1116}
1098 1117
1118static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1119 int slot_num,
1120 struct buffer_head **bh,
1121 struct inode **ret_inode)
1122{
1123 int status = -EACCES;
1124 struct inode *inode = NULL;
1125
1126 BUG_ON(slot_num >= osb->max_slots);
1127
1128 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1129 slot_num);
1130 if (!inode || is_bad_inode(inode)) {
1131 mlog_errno(status);
1132 goto bail;
1133 }
1134 SET_INODE_JOURNAL(inode);
1135
1136 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
1137 if (status < 0) {
1138 mlog_errno(status);
1139 goto bail;
1140 }
1141
1142 status = 0;
1143
1144bail:
1145 if (inode) {
1146 if (status || !ret_inode)
1147 iput(inode);
1148 else
1149 *ret_inode = inode;
1150 }
1151 return status;
1152}
1153
1099/* Does the actual journal replay and marks the journal inode as 1154/* Does the actual journal replay and marks the journal inode as
1100 * clean. Will only replay if the journal inode is marked dirty. */ 1155 * clean. Will only replay if the journal inode is marked dirty. */
1101static int ocfs2_replay_journal(struct ocfs2_super *osb, 1156static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1109 struct ocfs2_dinode *fe; 1164 struct ocfs2_dinode *fe;
1110 journal_t *journal = NULL; 1165 journal_t *journal = NULL;
1111 struct buffer_head *bh = NULL; 1166 struct buffer_head *bh = NULL;
1167 u32 slot_reco_gen;
1112 1168
1113 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 1169 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1114 slot_num); 1170 if (status) {
1115 if (inode == NULL) {
1116 status = -EACCES;
1117 mlog_errno(status); 1171 mlog_errno(status);
1118 goto done; 1172 goto done;
1119 } 1173 }
1120 if (is_bad_inode(inode)) { 1174
1121 status = -EACCES; 1175 fe = (struct ocfs2_dinode *)bh->b_data;
1122 iput(inode); 1176 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1123 inode = NULL; 1177 brelse(bh);
1124 mlog_errno(status); 1178 bh = NULL;
1179
1180 /*
1181 * As the fs recovery is asynchronous, there is a small chance that
1182 * another node mounted (and recovered) the slot before the recovery
1183 * thread could get the lock. To handle that, we dirty read the journal
1184 * inode for that slot to get the recovery generation. If it is
1185 * different than what we expected, the slot has been recovered.
1186 * If not, it needs recovery.
1187 */
1188 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1189 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
1190 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1191 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1192 status = -EBUSY;
1125 goto done; 1193 goto done;
1126 } 1194 }
1127 SET_INODE_JOURNAL(inode); 1195
1196 /* Continue with recovery as the journal has not yet been recovered */
1128 1197
1129 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1198 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1130 if (status < 0) { 1199 if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1138 fe = (struct ocfs2_dinode *) bh->b_data; 1207 fe = (struct ocfs2_dinode *) bh->b_data;
1139 1208
1140 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 1209 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1210 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1141 1211
1142 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1212 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1143 mlog(0, "No recovery required for node %d\n", node_num); 1213 mlog(0, "No recovery required for node %d\n", node_num);
1214 /* Refresh recovery generation for the slot */
1215 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1144 goto done; 1216 goto done;
1145 } 1217 }
1146 1218
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1188 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 1260 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1189 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 1261 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1190 1262
1263 /* Increment recovery generation to indicate successful recovery */
1264 ocfs2_bump_recovery_generation(fe);
1265 osb->slot_recovery_generations[slot_num] =
1266 ocfs2_get_recovery_generation(fe);
1267
1191 status = ocfs2_write_block(osb, bh, inode); 1268 status = ocfs2_write_block(osb, bh, inode);
1192 if (status < 0) 1269 if (status < 0)
1193 mlog_errno(status); 1270 mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1252 1329
1253 status = ocfs2_replay_journal(osb, node_num, slot_num); 1330 status = ocfs2_replay_journal(osb, node_num, slot_num);
1254 if (status < 0) { 1331 if (status < 0) {
1332 if (status == -EBUSY) {
1333 mlog(0, "Skipping recovery for slot %u (node %u) "
1334 "as another node has recovered it\n", slot_num,
1335 node_num);
1336 status = 0;
1337 goto done;
1338 }
1255 mlog_errno(status); 1339 mlog_errno(status);
1256 goto done; 1340 goto done;
1257 } 1341 }
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1334{ 1418{
1335 unsigned int node_num; 1419 unsigned int node_num;
1336 int status, i; 1420 int status, i;
1421 struct buffer_head *bh = NULL;
1422 struct ocfs2_dinode *di;
1337 1423
1338 /* This is called with the super block cluster lock, so we 1424 /* This is called with the super block cluster lock, so we
1339 * know that the slot map can't change underneath us. */ 1425 * know that the slot map can't change underneath us. */
1340 1426
1341 spin_lock(&osb->osb_lock); 1427 spin_lock(&osb->osb_lock);
1342 for (i = 0; i < osb->max_slots; i++) { 1428 for (i = 0; i < osb->max_slots; i++) {
1429 /* Read journal inode to get the recovery generation */
1430 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1431 if (status) {
1432 mlog_errno(status);
1433 goto bail;
1434 }
1435 di = (struct ocfs2_dinode *)bh->b_data;
1436 osb->slot_recovery_generations[i] =
1437 ocfs2_get_recovery_generation(di);
1438 brelse(bh);
1439 bh = NULL;
1440
1441 mlog(0, "Slot %u recovery generation is %u\n", i,
1442 osb->slot_recovery_generations[i]);
1443
1343 if (i == osb->slot_num) 1444 if (i == osb->slot_num)
1344 continue; 1445 continue;
1345 1446
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
1603 return 0; 1704 return 0;
1604} 1705}
1605 1706
1606/* Look for a dirty journal without taking any cluster locks. Used for 1707/* Reads all the journal inodes without taking any cluster locks. Used
1607 * hard readonly access to determine whether the file system journals 1708 * for hard readonly access to determine whether any journal requires
1608 * require recovery. */ 1709 * recovery. Also used to refresh the recovery generation numbers after
1710 * a journal has been recovered by another node.
1711 */
1609int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) 1712int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
1610{ 1713{
1611 int ret = 0; 1714 int ret = 0;
1612 unsigned int slot; 1715 unsigned int slot;
1613 struct buffer_head *di_bh; 1716 struct buffer_head *di_bh = NULL;
1614 struct ocfs2_dinode *di; 1717 struct ocfs2_dinode *di;
1615 struct inode *journal = NULL; 1718 int journal_dirty = 0;
1616 1719
1617 for(slot = 0; slot < osb->max_slots; slot++) { 1720 for(slot = 0; slot < osb->max_slots; slot++) {
1618 journal = ocfs2_get_system_file_inode(osb, 1721 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
1619 JOURNAL_SYSTEM_INODE, 1722 if (ret) {
1620 slot);
1621 if (!journal || is_bad_inode(journal)) {
1622 ret = -EACCES;
1623 mlog_errno(ret);
1624 goto out;
1625 }
1626
1627 di_bh = NULL;
1628 ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
1629 0, journal);
1630 if (ret < 0) {
1631 mlog_errno(ret); 1723 mlog_errno(ret);
1632 goto out; 1724 goto out;
1633 } 1725 }
1634 1726
1635 di = (struct ocfs2_dinode *) di_bh->b_data; 1727 di = (struct ocfs2_dinode *) di_bh->b_data;
1636 1728
1729 osb->slot_recovery_generations[slot] =
1730 ocfs2_get_recovery_generation(di);
1731
1637 if (le32_to_cpu(di->id1.journal1.ij_flags) & 1732 if (le32_to_cpu(di->id1.journal1.ij_flags) &
1638 OCFS2_JOURNAL_DIRTY_FL) 1733 OCFS2_JOURNAL_DIRTY_FL)
1639 ret = -EROFS; 1734 journal_dirty = 1;
1640 1735
1641 brelse(di_bh); 1736 brelse(di_bh);
1642 if (ret) 1737 di_bh = NULL;
1643 break;
1644 } 1738 }
1645 1739
1646out: 1740out:
1647 if (journal) 1741 if (journal_dirty)
1648 iput(journal); 1742 ret = -EROFS;
1649
1650 return ret; 1743 return ret;
1651} 1744}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532ed..2178ebffa05f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
161void ocfs2_journal_shutdown(struct ocfs2_super *osb); 161void ocfs2_journal_shutdown(struct ocfs2_super *osb);
162int ocfs2_journal_wipe(struct ocfs2_journal *journal, 162int ocfs2_journal_wipe(struct ocfs2_journal *journal,
163 int full); 163 int full);
164int ocfs2_journal_load(struct ocfs2_journal *journal, int local); 164int ocfs2_journal_load(struct ocfs2_journal *journal, int local,
165 int replayed);
165int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); 166int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
166void ocfs2_recovery_thread(struct ocfs2_super *osb, 167void ocfs2_recovery_thread(struct ocfs2_super *osb,
167 int node_num); 168 int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef1..7f625f2b1117 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
204 204
205 struct ocfs2_slot_info *slot_info; 205 struct ocfs2_slot_info *slot_info;
206 206
207 u32 *slot_recovery_generations;
208
207 spinlock_t node_map_lock; 209 spinlock_t node_map_lock;
208 210
209 u64 root_blkno; 211 u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f1945177629..4f619850ccf7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
660 struct { /* Info for journal system 660 struct { /* Info for journal system
661 inodes */ 661 inodes */
662 __le32 ij_flags; /* Mounted, version, etc. */ 662 __le32 ij_flags; /* Mounted, version, etc. */
663 __le32 ij_pad; 663 __le32 ij_recovery_generation; /* Incremented when the
664 journal is recovered
665 after an unclean
666 shutdown */
664 } journal1; 667 } journal1;
665 } id1; /* Inode type dependant 1 */ 668 } id1; /* Inode type dependant 1 */
666/*C0*/ union { 669/*C0*/ union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889aa..88255d3f52b4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
1442 } 1442 }
1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots); 1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
1444 1444
1445 osb->slot_recovery_generations =
1446 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
1447 GFP_KERNEL);
1448 if (!osb->slot_recovery_generations) {
1449 status = -ENOMEM;
1450 mlog_errno(status);
1451 goto bail;
1452 }
1453
1445 init_waitqueue_head(&osb->osb_wipe_event); 1454 init_waitqueue_head(&osb->osb_wipe_event);
1446 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1455 osb->osb_orphan_wipes = kcalloc(osb->max_slots,
1447 sizeof(*osb->osb_orphan_wipes), 1456 sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
1703 local = ocfs2_mount_local(osb); 1712 local = ocfs2_mount_local(osb);
1704 1713
1705 /* will play back anything left in the journal. */ 1714 /* will play back anything left in the journal. */
1706 status = ocfs2_journal_load(osb->journal, local); 1715 status = ocfs2_journal_load(osb->journal, local, dirty);
1707 if (status < 0) { 1716 if (status < 0) {
1708 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); 1717 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
1709 goto finally; 1718 goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
1768 ocfs2_free_slot_info(osb); 1777 ocfs2_free_slot_info(osb);
1769 1778
1770 kfree(osb->osb_orphan_wipes); 1779 kfree(osb->osb_orphan_wipes);
1780 kfree(osb->slot_recovery_generations);
1771 /* FIXME 1781 /* FIXME
1772 * This belongs in journal shutdown, but because we have to 1782 * This belongs in journal shutdown, but because we have to
1773 * allocate osb->journal at the start of ocfs2_initalize_osb(), 1783 * allocate osb->journal at the start of ocfs2_initalize_osb(),