diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-01 14:54:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-01 14:54:05 -0400 |
commit | 63a16f90167850010864a9e8ebb71d216983090f (patch) | |
tree | c4b284cc596421ac8100e0ad3b2f56ead4563d2d /fs | |
parent | 5adf2b03d97111c8955495ba11e8b7db27df8695 (diff) | |
parent | c259ae52e204d42f8b2d484c85517a4c367030e1 (diff) |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2:
[PATCH] ocfs2: Release mutex in error handling code
[PATCH] ocfs2: Fix oops when racing files truncates with writes into an mmap region
[PATCH 2/2] ocfs2: Fix race between mount and recovery
[PATCH 1/2] ocfs2: Add counter in struct ocfs2_dinode to track journal replays
[PATCH] configfs: Convenience macros for attribute definition.
[PATCH] configfs: Pin configfs subsystems separately from new config_items.
[PATCH] configfs: Fix open directory making rmdir() fail
[PATCH] configfs: Lock new directory inodes before removing on cleanup after failure
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
[PATCH] configfs: Fix failing symlink() making rmdir() fail
[PATCH] configfs: Fix symlink() to a removing item
[PATCH] configfs: Include linux/err.h in linux/configfs.h
Diffstat (limited to 'fs')
-rw-r--r-- | fs/configfs/configfs_internal.h | 3 | ||||
-rw-r--r-- | fs/configfs/dir.c | 210 | ||||
-rw-r--r-- | fs/configfs/symlink.c | 26 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 29 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 173 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 3 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 12 |
10 files changed, 367 insertions, 98 deletions
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index da015c12e3ea..762d287123ca 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h | |||
@@ -49,8 +49,10 @@ struct configfs_dirent { | |||
49 | #define CONFIGFS_USET_DEFAULT 0x0080 | 49 | #define CONFIGFS_USET_DEFAULT 0x0080 |
50 | #define CONFIGFS_USET_DROPPING 0x0100 | 50 | #define CONFIGFS_USET_DROPPING 0x0100 |
51 | #define CONFIGFS_USET_IN_MKDIR 0x0200 | 51 | #define CONFIGFS_USET_IN_MKDIR 0x0200 |
52 | #define CONFIGFS_USET_CREATING 0x0400 | ||
52 | #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) | 53 | #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) |
53 | 54 | ||
55 | extern struct mutex configfs_symlink_mutex; | ||
54 | extern spinlock_t configfs_dirent_lock; | 56 | extern spinlock_t configfs_dirent_lock; |
55 | 57 | ||
56 | extern struct vfsmount * configfs_mount; | 58 | extern struct vfsmount * configfs_mount; |
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void); | |||
66 | extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); | 68 | extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); |
67 | extern int configfs_make_dirent(struct configfs_dirent *, | 69 | extern int configfs_make_dirent(struct configfs_dirent *, |
68 | struct dentry *, void *, umode_t, int); | 70 | struct dentry *, void *, umode_t, int); |
71 | extern int configfs_dirent_is_ready(struct configfs_dirent *); | ||
69 | 72 | ||
70 | extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); | 73 | extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); |
71 | extern void configfs_hash_and_remove(struct dentry * dir, const char * name); | 74 | extern void configfs_hash_and_remove(struct dentry * dir, const char * name); |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 179589be063a..7a8db78a91d2 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p, | |||
185 | error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); | 185 | error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); |
186 | if (!error) | 186 | if (!error) |
187 | error = configfs_make_dirent(p->d_fsdata, d, k, mode, | 187 | error = configfs_make_dirent(p->d_fsdata, d, k, mode, |
188 | CONFIGFS_DIR); | 188 | CONFIGFS_DIR | CONFIGFS_USET_CREATING); |
189 | if (!error) { | 189 | if (!error) { |
190 | error = configfs_create(d, mode, init_dir); | 190 | error = configfs_create(d, mode, init_dir); |
191 | if (!error) { | 191 | if (!error) { |
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p, | |||
209 | * configfs_create_dir - create a directory for an config_item. | 209 | * configfs_create_dir - create a directory for an config_item. |
210 | * @item: config_itemwe're creating directory for. | 210 | * @item: config_itemwe're creating directory for. |
211 | * @dentry: config_item's dentry. | 211 | * @dentry: config_item's dentry. |
212 | * | ||
213 | * Note: user-created entries won't be allowed under this new directory | ||
214 | * until it is validated by configfs_dir_set_ready() | ||
212 | */ | 215 | */ |
213 | 216 | ||
214 | static int configfs_create_dir(struct config_item * item, struct dentry *dentry) | 217 | static int configfs_create_dir(struct config_item * item, struct dentry *dentry) |
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry) | |||
231 | return error; | 234 | return error; |
232 | } | 235 | } |
233 | 236 | ||
237 | /* | ||
238 | * Allow userspace to create new entries under a new directory created with | ||
239 | * configfs_create_dir(), and under all of its chidlren directories recursively. | ||
240 | * @sd configfs_dirent of the new directory to validate | ||
241 | * | ||
242 | * Caller must hold configfs_dirent_lock. | ||
243 | */ | ||
244 | static void configfs_dir_set_ready(struct configfs_dirent *sd) | ||
245 | { | ||
246 | struct configfs_dirent *child_sd; | ||
247 | |||
248 | sd->s_type &= ~CONFIGFS_USET_CREATING; | ||
249 | list_for_each_entry(child_sd, &sd->s_children, s_sibling) | ||
250 | if (child_sd->s_type & CONFIGFS_USET_CREATING) | ||
251 | configfs_dir_set_ready(child_sd); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Check that a directory does not belong to a directory hierarchy being | ||
256 | * attached and not validated yet. | ||
257 | * @sd configfs_dirent of the directory to check | ||
258 | * | ||
259 | * @return non-zero iff the directory was validated | ||
260 | * | ||
261 | * Note: takes configfs_dirent_lock, so the result may change from false to true | ||
262 | * in two consecutive calls, but never from true to false. | ||
263 | */ | ||
264 | int configfs_dirent_is_ready(struct configfs_dirent *sd) | ||
265 | { | ||
266 | int ret; | ||
267 | |||
268 | spin_lock(&configfs_dirent_lock); | ||
269 | ret = !(sd->s_type & CONFIGFS_USET_CREATING); | ||
270 | spin_unlock(&configfs_dirent_lock); | ||
271 | |||
272 | return ret; | ||
273 | } | ||
274 | |||
234 | int configfs_create_link(struct configfs_symlink *sl, | 275 | int configfs_create_link(struct configfs_symlink *sl, |
235 | struct dentry *parent, | 276 | struct dentry *parent, |
236 | struct dentry *dentry) | 277 | struct dentry *dentry) |
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d) | |||
283 | * The only thing special about this is that we remove any files in | 324 | * The only thing special about this is that we remove any files in |
284 | * the directory before we remove the directory, and we've inlined | 325 | * the directory before we remove the directory, and we've inlined |
285 | * what used to be configfs_rmdir() below, instead of calling separately. | 326 | * what used to be configfs_rmdir() below, instead of calling separately. |
327 | * | ||
328 | * Caller holds the mutex of the item's inode | ||
286 | */ | 329 | */ |
287 | 330 | ||
288 | static void configfs_remove_dir(struct config_item * item) | 331 | static void configfs_remove_dir(struct config_item * item) |
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir, | |||
330 | struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; | 373 | struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; |
331 | struct configfs_dirent * sd; | 374 | struct configfs_dirent * sd; |
332 | int found = 0; | 375 | int found = 0; |
333 | int err = 0; | 376 | int err; |
377 | |||
378 | /* | ||
379 | * Fake invisibility if dir belongs to a group/default groups hierarchy | ||
380 | * being attached | ||
381 | * | ||
382 | * This forbids userspace to read/write attributes of items which may | ||
383 | * not complete their initialization, since the dentries of the | ||
384 | * attributes won't be instantiated. | ||
385 | */ | ||
386 | err = -ENOENT; | ||
387 | if (!configfs_dirent_is_ready(parent_sd)) | ||
388 | goto out; | ||
334 | 389 | ||
335 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { | 390 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { |
336 | if (sd->s_type & CONFIGFS_NOT_PINNED) { | 391 | if (sd->s_type & CONFIGFS_NOT_PINNED) { |
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir, | |||
353 | return simple_lookup(dir, dentry, nd); | 408 | return simple_lookup(dir, dentry, nd); |
354 | } | 409 | } |
355 | 410 | ||
411 | out: | ||
356 | return ERR_PTR(err); | 412 | return ERR_PTR(err); |
357 | } | 413 | } |
358 | 414 | ||
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex | |||
370 | struct configfs_dirent *sd; | 426 | struct configfs_dirent *sd; |
371 | int ret; | 427 | int ret; |
372 | 428 | ||
429 | /* Mark that we're trying to drop the group */ | ||
430 | parent_sd->s_type |= CONFIGFS_USET_DROPPING; | ||
431 | |||
373 | ret = -EBUSY; | 432 | ret = -EBUSY; |
374 | if (!list_empty(&parent_sd->s_links)) | 433 | if (!list_empty(&parent_sd->s_links)) |
375 | goto out; | 434 | goto out; |
376 | 435 | ||
377 | ret = 0; | 436 | ret = 0; |
378 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { | 437 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { |
379 | if (sd->s_type & CONFIGFS_NOT_PINNED) | 438 | if (!sd->s_element || |
439 | (sd->s_type & CONFIGFS_NOT_PINNED)) | ||
380 | continue; | 440 | continue; |
381 | if (sd->s_type & CONFIGFS_USET_DEFAULT) { | 441 | if (sd->s_type & CONFIGFS_USET_DEFAULT) { |
382 | /* Abort if racing with mkdir() */ | 442 | /* Abort if racing with mkdir() */ |
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex | |||
385 | *wait_mutex = &sd->s_dentry->d_inode->i_mutex; | 445 | *wait_mutex = &sd->s_dentry->d_inode->i_mutex; |
386 | return -EAGAIN; | 446 | return -EAGAIN; |
387 | } | 447 | } |
388 | /* Mark that we're trying to drop the group */ | ||
389 | sd->s_type |= CONFIGFS_USET_DROPPING; | ||
390 | 448 | ||
391 | /* | 449 | /* |
392 | * Yup, recursive. If there's a problem, blame | 450 | * Yup, recursive. If there's a problem, blame |
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry) | |||
414 | struct configfs_dirent *parent_sd = dentry->d_fsdata; | 472 | struct configfs_dirent *parent_sd = dentry->d_fsdata; |
415 | struct configfs_dirent *sd; | 473 | struct configfs_dirent *sd; |
416 | 474 | ||
417 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { | 475 | parent_sd->s_type &= ~CONFIGFS_USET_DROPPING; |
418 | if (sd->s_type & CONFIGFS_USET_DEFAULT) { | 476 | |
477 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) | ||
478 | if (sd->s_type & CONFIGFS_USET_DEFAULT) | ||
419 | configfs_detach_rollback(sd->s_dentry); | 479 | configfs_detach_rollback(sd->s_dentry); |
420 | sd->s_type &= ~CONFIGFS_USET_DROPPING; | ||
421 | } | ||
422 | } | ||
423 | } | 480 | } |
424 | 481 | ||
425 | static void detach_attrs(struct config_item * item) | 482 | static void detach_attrs(struct config_item * item) |
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group, | |||
558 | static int populate_groups(struct config_group *group) | 615 | static int populate_groups(struct config_group *group) |
559 | { | 616 | { |
560 | struct config_group *new_group; | 617 | struct config_group *new_group; |
561 | struct dentry *dentry = group->cg_item.ci_dentry; | ||
562 | int ret = 0; | 618 | int ret = 0; |
563 | int i; | 619 | int i; |
564 | 620 | ||
565 | if (group->default_groups) { | 621 | if (group->default_groups) { |
566 | /* | ||
567 | * FYI, we're faking mkdir here | ||
568 | * I'm not sure we need this semaphore, as we're called | ||
569 | * from our parent's mkdir. That holds our parent's | ||
570 | * i_mutex, so afaik lookup cannot continue through our | ||
571 | * parent to find us, let alone mess with our tree. | ||
572 | * That said, taking our i_mutex is closer to mkdir | ||
573 | * emulation, and shouldn't hurt. | ||
574 | */ | ||
575 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); | ||
576 | |||
577 | for (i = 0; group->default_groups[i]; i++) { | 622 | for (i = 0; group->default_groups[i]; i++) { |
578 | new_group = group->default_groups[i]; | 623 | new_group = group->default_groups[i]; |
579 | 624 | ||
580 | ret = create_default_group(group, new_group); | 625 | ret = create_default_group(group, new_group); |
581 | if (ret) | 626 | if (ret) { |
627 | detach_groups(group); | ||
582 | break; | 628 | break; |
629 | } | ||
583 | } | 630 | } |
584 | |||
585 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
586 | } | 631 | } |
587 | 632 | ||
588 | if (ret) | ||
589 | detach_groups(group); | ||
590 | |||
591 | return ret; | 633 | return ret; |
592 | } | 634 | } |
593 | 635 | ||
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item, | |||
702 | if (!ret) { | 744 | if (!ret) { |
703 | ret = populate_attrs(item); | 745 | ret = populate_attrs(item); |
704 | if (ret) { | 746 | if (ret) { |
747 | /* | ||
748 | * We are going to remove an inode and its dentry but | ||
749 | * the VFS may already have hit and used them. Thus, | ||
750 | * we must lock them as rmdir() would. | ||
751 | */ | ||
752 | mutex_lock(&dentry->d_inode->i_mutex); | ||
705 | configfs_remove_dir(item); | 753 | configfs_remove_dir(item); |
754 | dentry->d_inode->i_flags |= S_DEAD; | ||
755 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
706 | d_delete(dentry); | 756 | d_delete(dentry); |
707 | } | 757 | } |
708 | } | 758 | } |
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item, | |||
710 | return ret; | 760 | return ret; |
711 | } | 761 | } |
712 | 762 | ||
763 | /* Caller holds the mutex of the item's inode */ | ||
713 | static void configfs_detach_item(struct config_item *item) | 764 | static void configfs_detach_item(struct config_item *item) |
714 | { | 765 | { |
715 | detach_attrs(item); | 766 | detach_attrs(item); |
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item, | |||
728 | sd = dentry->d_fsdata; | 779 | sd = dentry->d_fsdata; |
729 | sd->s_type |= CONFIGFS_USET_DIR; | 780 | sd->s_type |= CONFIGFS_USET_DIR; |
730 | 781 | ||
782 | /* | ||
783 | * FYI, we're faking mkdir in populate_groups() | ||
784 | * We must lock the group's inode to avoid races with the VFS | ||
785 | * which can already hit the inode and try to add/remove entries | ||
786 | * under it. | ||
787 | * | ||
788 | * We must also lock the inode to remove it safely in case of | ||
789 | * error, as rmdir() would. | ||
790 | */ | ||
791 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); | ||
731 | ret = populate_groups(to_config_group(item)); | 792 | ret = populate_groups(to_config_group(item)); |
732 | if (ret) { | 793 | if (ret) { |
733 | configfs_detach_item(item); | 794 | configfs_detach_item(item); |
734 | d_delete(dentry); | 795 | dentry->d_inode->i_flags |= S_DEAD; |
735 | } | 796 | } |
797 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
798 | if (ret) | ||
799 | d_delete(dentry); | ||
736 | } | 800 | } |
737 | 801 | ||
738 | return ret; | 802 | return ret; |
739 | } | 803 | } |
740 | 804 | ||
805 | /* Caller holds the mutex of the group's inode */ | ||
741 | static void configfs_detach_group(struct config_item *item) | 806 | static void configfs_detach_group(struct config_item *item) |
742 | { | 807 | { |
743 | detach_groups(to_config_group(item)); | 808 | detach_groups(to_config_group(item)); |
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1035 | struct configfs_subsystem *subsys; | 1100 | struct configfs_subsystem *subsys; |
1036 | struct configfs_dirent *sd; | 1101 | struct configfs_dirent *sd; |
1037 | struct config_item_type *type; | 1102 | struct config_item_type *type; |
1038 | struct module *owner = NULL; | 1103 | struct module *subsys_owner = NULL, *new_item_owner = NULL; |
1039 | char *name; | 1104 | char *name; |
1040 | 1105 | ||
1041 | if (dentry->d_parent == configfs_sb->s_root) { | 1106 | if (dentry->d_parent == configfs_sb->s_root) { |
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1044 | } | 1109 | } |
1045 | 1110 | ||
1046 | sd = dentry->d_parent->d_fsdata; | 1111 | sd = dentry->d_parent->d_fsdata; |
1112 | |||
1113 | /* | ||
1114 | * Fake invisibility if dir belongs to a group/default groups hierarchy | ||
1115 | * being attached | ||
1116 | */ | ||
1117 | if (!configfs_dirent_is_ready(sd)) { | ||
1118 | ret = -ENOENT; | ||
1119 | goto out; | ||
1120 | } | ||
1121 | |||
1047 | if (!(sd->s_type & CONFIGFS_USET_DIR)) { | 1122 | if (!(sd->s_type & CONFIGFS_USET_DIR)) { |
1048 | ret = -EPERM; | 1123 | ret = -EPERM; |
1049 | goto out; | 1124 | goto out; |
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1062 | goto out_put; | 1137 | goto out_put; |
1063 | } | 1138 | } |
1064 | 1139 | ||
1140 | /* | ||
1141 | * The subsystem may belong to a different module than the item | ||
1142 | * being created. We don't want to safely pin the new item but | ||
1143 | * fail to pin the subsystem it sits under. | ||
1144 | */ | ||
1145 | if (!subsys->su_group.cg_item.ci_type) { | ||
1146 | ret = -EINVAL; | ||
1147 | goto out_put; | ||
1148 | } | ||
1149 | subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; | ||
1150 | if (!try_module_get(subsys_owner)) { | ||
1151 | ret = -EINVAL; | ||
1152 | goto out_put; | ||
1153 | } | ||
1154 | |||
1065 | name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); | 1155 | name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); |
1066 | if (!name) { | 1156 | if (!name) { |
1067 | ret = -ENOMEM; | 1157 | ret = -ENOMEM; |
1068 | goto out_put; | 1158 | goto out_subsys_put; |
1069 | } | 1159 | } |
1070 | 1160 | ||
1071 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); | 1161 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); |
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1094 | kfree(name); | 1184 | kfree(name); |
1095 | if (ret) { | 1185 | if (ret) { |
1096 | /* | 1186 | /* |
1097 | * If item == NULL, then link_obj() was never called. | 1187 | * If ret != 0, then link_obj() was never called. |
1098 | * There are no extra references to clean up. | 1188 | * There are no extra references to clean up. |
1099 | */ | 1189 | */ |
1100 | goto out_put; | 1190 | goto out_subsys_put; |
1101 | } | 1191 | } |
1102 | 1192 | ||
1103 | /* | 1193 | /* |
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1111 | goto out_unlink; | 1201 | goto out_unlink; |
1112 | } | 1202 | } |
1113 | 1203 | ||
1114 | owner = type->ct_owner; | 1204 | new_item_owner = type->ct_owner; |
1115 | if (!try_module_get(owner)) { | 1205 | if (!try_module_get(new_item_owner)) { |
1116 | ret = -EINVAL; | 1206 | ret = -EINVAL; |
1117 | goto out_unlink; | 1207 | goto out_unlink; |
1118 | } | 1208 | } |
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1142 | 1232 | ||
1143 | spin_lock(&configfs_dirent_lock); | 1233 | spin_lock(&configfs_dirent_lock); |
1144 | sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; | 1234 | sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; |
1235 | if (!ret) | ||
1236 | configfs_dir_set_ready(dentry->d_fsdata); | ||
1145 | spin_unlock(&configfs_dirent_lock); | 1237 | spin_unlock(&configfs_dirent_lock); |
1146 | 1238 | ||
1147 | out_unlink: | 1239 | out_unlink: |
@@ -1159,9 +1251,13 @@ out_unlink: | |||
1159 | mutex_unlock(&subsys->su_mutex); | 1251 | mutex_unlock(&subsys->su_mutex); |
1160 | 1252 | ||
1161 | if (module_got) | 1253 | if (module_got) |
1162 | module_put(owner); | 1254 | module_put(new_item_owner); |
1163 | } | 1255 | } |
1164 | 1256 | ||
1257 | out_subsys_put: | ||
1258 | if (ret) | ||
1259 | module_put(subsys_owner); | ||
1260 | |||
1165 | out_put: | 1261 | out_put: |
1166 | /* | 1262 | /* |
1167 | * link_obj()/link_group() took a reference from child->parent, | 1263 | * link_obj()/link_group() took a reference from child->parent, |
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1180 | struct config_item *item; | 1276 | struct config_item *item; |
1181 | struct configfs_subsystem *subsys; | 1277 | struct configfs_subsystem *subsys; |
1182 | struct configfs_dirent *sd; | 1278 | struct configfs_dirent *sd; |
1183 | struct module *owner = NULL; | 1279 | struct module *subsys_owner = NULL, *dead_item_owner = NULL; |
1184 | int ret; | 1280 | int ret; |
1185 | 1281 | ||
1186 | if (dentry->d_parent == configfs_sb->s_root) | 1282 | if (dentry->d_parent == configfs_sb->s_root) |
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1207 | return -EINVAL; | 1303 | return -EINVAL; |
1208 | } | 1304 | } |
1209 | 1305 | ||
1306 | /* configfs_mkdir() shouldn't have allowed this */ | ||
1307 | BUG_ON(!subsys->su_group.cg_item.ci_type); | ||
1308 | subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; | ||
1309 | |||
1310 | /* | ||
1311 | * Ensure that no racing symlink() will make detach_prep() fail while | ||
1312 | * the new link is temporarily attached | ||
1313 | */ | ||
1314 | mutex_lock(&configfs_symlink_mutex); | ||
1210 | spin_lock(&configfs_dirent_lock); | 1315 | spin_lock(&configfs_dirent_lock); |
1211 | do { | 1316 | do { |
1212 | struct mutex *wait_mutex; | 1317 | struct mutex *wait_mutex; |
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1215 | if (ret) { | 1320 | if (ret) { |
1216 | configfs_detach_rollback(dentry); | 1321 | configfs_detach_rollback(dentry); |
1217 | spin_unlock(&configfs_dirent_lock); | 1322 | spin_unlock(&configfs_dirent_lock); |
1323 | mutex_unlock(&configfs_symlink_mutex); | ||
1218 | if (ret != -EAGAIN) { | 1324 | if (ret != -EAGAIN) { |
1219 | config_item_put(parent_item); | 1325 | config_item_put(parent_item); |
1220 | return ret; | 1326 | return ret; |
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1224 | mutex_lock(wait_mutex); | 1330 | mutex_lock(wait_mutex); |
1225 | mutex_unlock(wait_mutex); | 1331 | mutex_unlock(wait_mutex); |
1226 | 1332 | ||
1333 | mutex_lock(&configfs_symlink_mutex); | ||
1227 | spin_lock(&configfs_dirent_lock); | 1334 | spin_lock(&configfs_dirent_lock); |
1228 | } | 1335 | } |
1229 | } while (ret == -EAGAIN); | 1336 | } while (ret == -EAGAIN); |
1230 | spin_unlock(&configfs_dirent_lock); | 1337 | spin_unlock(&configfs_dirent_lock); |
1338 | mutex_unlock(&configfs_symlink_mutex); | ||
1231 | 1339 | ||
1232 | /* Get a working ref for the duration of this function */ | 1340 | /* Get a working ref for the duration of this function */ |
1233 | item = configfs_get_config_item(dentry); | 1341 | item = configfs_get_config_item(dentry); |
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1236 | config_item_put(parent_item); | 1344 | config_item_put(parent_item); |
1237 | 1345 | ||
1238 | if (item->ci_type) | 1346 | if (item->ci_type) |
1239 | owner = item->ci_type->ct_owner; | 1347 | dead_item_owner = item->ci_type->ct_owner; |
1240 | 1348 | ||
1241 | if (sd->s_type & CONFIGFS_USET_DIR) { | 1349 | if (sd->s_type & CONFIGFS_USET_DIR) { |
1242 | configfs_detach_group(item); | 1350 | configfs_detach_group(item); |
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1258 | /* Drop our reference from above */ | 1366 | /* Drop our reference from above */ |
1259 | config_item_put(item); | 1367 | config_item_put(item); |
1260 | 1368 | ||
1261 | module_put(owner); | 1369 | module_put(dead_item_owner); |
1370 | module_put(subsys_owner); | ||
1262 | 1371 | ||
1263 | return 0; | 1372 | return 0; |
1264 | } | 1373 | } |
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file) | |||
1314 | { | 1423 | { |
1315 | struct dentry * dentry = file->f_path.dentry; | 1424 | struct dentry * dentry = file->f_path.dentry; |
1316 | struct configfs_dirent * parent_sd = dentry->d_fsdata; | 1425 | struct configfs_dirent * parent_sd = dentry->d_fsdata; |
1426 | int err; | ||
1317 | 1427 | ||
1318 | mutex_lock(&dentry->d_inode->i_mutex); | 1428 | mutex_lock(&dentry->d_inode->i_mutex); |
1319 | file->private_data = configfs_new_dirent(parent_sd, NULL); | 1429 | /* |
1430 | * Fake invisibility if dir belongs to a group/default groups hierarchy | ||
1431 | * being attached | ||
1432 | */ | ||
1433 | err = -ENOENT; | ||
1434 | if (configfs_dirent_is_ready(parent_sd)) { | ||
1435 | file->private_data = configfs_new_dirent(parent_sd, NULL); | ||
1436 | if (IS_ERR(file->private_data)) | ||
1437 | err = PTR_ERR(file->private_data); | ||
1438 | else | ||
1439 | err = 0; | ||
1440 | } | ||
1320 | mutex_unlock(&dentry->d_inode->i_mutex); | 1441 | mutex_unlock(&dentry->d_inode->i_mutex); |
1321 | 1442 | ||
1322 | return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; | 1443 | return err; |
1323 | |||
1324 | } | 1444 | } |
1325 | 1445 | ||
1326 | static int configfs_dir_close(struct inode *inode, struct file *file) | 1446 | static int configfs_dir_close(struct inode *inode, struct file *file) |
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) | |||
1491 | if (err) { | 1611 | if (err) { |
1492 | d_delete(dentry); | 1612 | d_delete(dentry); |
1493 | dput(dentry); | 1613 | dput(dentry); |
1614 | } else { | ||
1615 | spin_lock(&configfs_dirent_lock); | ||
1616 | configfs_dir_set_ready(dentry->d_fsdata); | ||
1617 | spin_unlock(&configfs_dirent_lock); | ||
1494 | } | 1618 | } |
1495 | } | 1619 | } |
1496 | 1620 | ||
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) | |||
1517 | mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, | 1641 | mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, |
1518 | I_MUTEX_PARENT); | 1642 | I_MUTEX_PARENT); |
1519 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); | 1643 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); |
1644 | mutex_lock(&configfs_symlink_mutex); | ||
1520 | spin_lock(&configfs_dirent_lock); | 1645 | spin_lock(&configfs_dirent_lock); |
1521 | if (configfs_detach_prep(dentry, NULL)) { | 1646 | if (configfs_detach_prep(dentry, NULL)) { |
1522 | printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); | 1647 | printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); |
1523 | } | 1648 | } |
1524 | spin_unlock(&configfs_dirent_lock); | 1649 | spin_unlock(&configfs_dirent_lock); |
1650 | mutex_unlock(&configfs_symlink_mutex); | ||
1525 | configfs_detach_group(&group->cg_item); | 1651 | configfs_detach_group(&group->cg_item); |
1526 | dentry->d_inode->i_flags |= S_DEAD; | 1652 | dentry->d_inode->i_flags |= S_DEAD; |
1527 | mutex_unlock(&dentry->d_inode->i_mutex); | 1653 | mutex_unlock(&dentry->d_inode->i_mutex); |
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 0004d18c40ac..bf74973b0492 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c | |||
@@ -31,6 +31,9 @@ | |||
31 | #include <linux/configfs.h> | 31 | #include <linux/configfs.h> |
32 | #include "configfs_internal.h" | 32 | #include "configfs_internal.h" |
33 | 33 | ||
34 | /* Protects attachments of new symlinks */ | ||
35 | DEFINE_MUTEX(configfs_symlink_mutex); | ||
36 | |||
34 | static int item_depth(struct config_item * item) | 37 | static int item_depth(struct config_item * item) |
35 | { | 38 | { |
36 | struct config_item * p = item; | 39 | struct config_item * p = item; |
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item, | |||
73 | struct configfs_symlink *sl; | 76 | struct configfs_symlink *sl; |
74 | int ret; | 77 | int ret; |
75 | 78 | ||
79 | ret = -ENOENT; | ||
80 | if (!configfs_dirent_is_ready(target_sd)) | ||
81 | goto out; | ||
76 | ret = -ENOMEM; | 82 | ret = -ENOMEM; |
77 | sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); | 83 | sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); |
78 | if (sl) { | 84 | if (sl) { |
79 | sl->sl_target = config_item_get(item); | 85 | sl->sl_target = config_item_get(item); |
80 | spin_lock(&configfs_dirent_lock); | 86 | spin_lock(&configfs_dirent_lock); |
87 | if (target_sd->s_type & CONFIGFS_USET_DROPPING) { | ||
88 | spin_unlock(&configfs_dirent_lock); | ||
89 | config_item_put(item); | ||
90 | kfree(sl); | ||
91 | return -ENOENT; | ||
92 | } | ||
81 | list_add(&sl->sl_list, &target_sd->s_links); | 93 | list_add(&sl->sl_list, &target_sd->s_links); |
82 | spin_unlock(&configfs_dirent_lock); | 94 | spin_unlock(&configfs_dirent_lock); |
83 | ret = configfs_create_link(sl, parent_item->ci_dentry, | 95 | ret = configfs_create_link(sl, parent_item->ci_dentry, |
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item, | |||
91 | } | 103 | } |
92 | } | 104 | } |
93 | 105 | ||
106 | out: | ||
94 | return ret; | 107 | return ret; |
95 | } | 108 | } |
96 | 109 | ||
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna | |||
120 | { | 133 | { |
121 | int ret; | 134 | int ret; |
122 | struct nameidata nd; | 135 | struct nameidata nd; |
136 | struct configfs_dirent *sd; | ||
123 | struct config_item *parent_item; | 137 | struct config_item *parent_item; |
124 | struct config_item *target_item; | 138 | struct config_item *target_item; |
125 | struct config_item_type *type; | 139 | struct config_item_type *type; |
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna | |||
128 | if (dentry->d_parent == configfs_sb->s_root) | 142 | if (dentry->d_parent == configfs_sb->s_root) |
129 | goto out; | 143 | goto out; |
130 | 144 | ||
145 | sd = dentry->d_parent->d_fsdata; | ||
146 | /* | ||
147 | * Fake invisibility if dir belongs to a group/default groups hierarchy | ||
148 | * being attached | ||
149 | */ | ||
150 | ret = -ENOENT; | ||
151 | if (!configfs_dirent_is_ready(sd)) | ||
152 | goto out; | ||
153 | |||
131 | parent_item = configfs_get_config_item(dentry->d_parent); | 154 | parent_item = configfs_get_config_item(dentry->d_parent); |
132 | type = parent_item->ci_type; | 155 | type = parent_item->ci_type; |
133 | 156 | ||
157 | ret = -EPERM; | ||
134 | if (!type || !type->ct_item_ops || | 158 | if (!type || !type->ct_item_ops || |
135 | !type->ct_item_ops->allow_link) | 159 | !type->ct_item_ops->allow_link) |
136 | goto out_put; | 160 | goto out_put; |
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna | |||
141 | 165 | ||
142 | ret = type->ct_item_ops->allow_link(parent_item, target_item); | 166 | ret = type->ct_item_ops->allow_link(parent_item, target_item); |
143 | if (!ret) { | 167 | if (!ret) { |
168 | mutex_lock(&configfs_symlink_mutex); | ||
144 | ret = create_link(parent_item, target_item, dentry); | 169 | ret = create_link(parent_item, target_item, dentry); |
170 | mutex_unlock(&configfs_symlink_mutex); | ||
145 | if (ret && type->ct_item_ops->drop_link) | 171 | if (ret && type->ct_item_ops->drop_link) |
146 | type->ct_item_ops->drop_link(parent_item, | 172 | type->ct_item_ops->drop_link(parent_item, |
147 | target_item); | 173 | target_item); |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1db080135c6d..506c24fb5078 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode, | |||
1073 | for(i = 0; i < wc->w_num_pages; i++) { | 1073 | for(i = 0; i < wc->w_num_pages; i++) { |
1074 | tmppage = wc->w_pages[i]; | 1074 | tmppage = wc->w_pages[i]; |
1075 | 1075 | ||
1076 | if (ocfs2_should_order_data(inode)) | 1076 | if (page_has_buffers(tmppage)) { |
1077 | walk_page_buffers(wc->w_handle, page_buffers(tmppage), | 1077 | if (ocfs2_should_order_data(inode)) |
1078 | from, to, NULL, | 1078 | walk_page_buffers(wc->w_handle, |
1079 | ocfs2_journal_dirty_data); | 1079 | page_buffers(tmppage), |
1080 | 1080 | from, to, NULL, | |
1081 | block_commit_write(tmppage, from, to); | 1081 | ocfs2_journal_dirty_data); |
1082 | |||
1083 | block_commit_write(tmppage, from, to); | ||
1084 | } | ||
1082 | } | 1085 | } |
1083 | } | 1086 | } |
1084 | 1087 | ||
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
1901 | to = PAGE_CACHE_SIZE; | 1904 | to = PAGE_CACHE_SIZE; |
1902 | } | 1905 | } |
1903 | 1906 | ||
1904 | if (ocfs2_should_order_data(inode)) | 1907 | if (page_has_buffers(tmppage)) { |
1905 | walk_page_buffers(wc->w_handle, page_buffers(tmppage), | 1908 | if (ocfs2_should_order_data(inode)) |
1906 | from, to, NULL, | 1909 | walk_page_buffers(wc->w_handle, |
1907 | ocfs2_journal_dirty_data); | 1910 | page_buffers(tmppage), |
1908 | 1911 | from, to, NULL, | |
1909 | block_commit_write(tmppage, from, to); | 1912 | ocfs2_journal_dirty_data); |
1913 | block_commit_write(tmppage, from, to); | ||
1914 | } | ||
1910 | } | 1915 | } |
1911 | 1916 | ||
1912 | out_write_size: | 1917 | out_write_size: |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index be2dd95d3a1d..ec2ed15c3daa 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1766,8 +1766,8 @@ out_inode_unlock: | |||
1766 | out_rw_unlock: | 1766 | out_rw_unlock: |
1767 | ocfs2_rw_unlock(inode, 1); | 1767 | ocfs2_rw_unlock(inode, 1); |
1768 | 1768 | ||
1769 | mutex_unlock(&inode->i_mutex); | ||
1770 | out: | 1769 | out: |
1770 | mutex_unlock(&inode->i_mutex); | ||
1771 | return ret; | 1771 | return ret; |
1772 | } | 1772 | } |
1773 | 1773 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a8c19cb3cfdd..7a37240f7a31 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg); | |||
57 | static int ocfs2_commit_cache(struct ocfs2_super *osb); | 57 | static int ocfs2_commit_cache(struct ocfs2_super *osb); |
58 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb); | 58 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb); |
59 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | 59 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, |
60 | int dirty); | 60 | int dirty, int replayed); |
61 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, | 61 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, |
62 | int slot_num); | 62 | int slot_num); |
63 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 63 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, |
@@ -562,8 +562,18 @@ done: | |||
562 | return status; | 562 | return status; |
563 | } | 563 | } |
564 | 564 | ||
565 | static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di) | ||
566 | { | ||
567 | le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1); | ||
568 | } | ||
569 | |||
570 | static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di) | ||
571 | { | ||
572 | return le32_to_cpu(di->id1.journal1.ij_recovery_generation); | ||
573 | } | ||
574 | |||
565 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | 575 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, |
566 | int dirty) | 576 | int dirty, int replayed) |
567 | { | 577 | { |
568 | int status; | 578 | int status; |
569 | unsigned int flags; | 579 | unsigned int flags; |
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | |||
593 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; | 603 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; |
594 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); | 604 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); |
595 | 605 | ||
606 | if (replayed) | ||
607 | ocfs2_bump_recovery_generation(fe); | ||
608 | |||
596 | status = ocfs2_write_block(osb, bh, journal->j_inode); | 609 | status = ocfs2_write_block(osb, bh, journal->j_inode); |
597 | if (status < 0) | 610 | if (status < 0) |
598 | mlog_errno(status); | 611 | mlog_errno(status); |
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
667 | * Do not toggle if flush was unsuccessful otherwise | 680 | * Do not toggle if flush was unsuccessful otherwise |
668 | * will leave dirty metadata in a "clean" journal | 681 | * will leave dirty metadata in a "clean" journal |
669 | */ | 682 | */ |
670 | status = ocfs2_journal_toggle_dirty(osb, 0); | 683 | status = ocfs2_journal_toggle_dirty(osb, 0, 0); |
671 | if (status < 0) | 684 | if (status < 0) |
672 | mlog_errno(status); | 685 | mlog_errno(status); |
673 | } | 686 | } |
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb, | |||
710 | } | 723 | } |
711 | } | 724 | } |
712 | 725 | ||
713 | int ocfs2_journal_load(struct ocfs2_journal *journal, int local) | 726 | int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) |
714 | { | 727 | { |
715 | int status = 0; | 728 | int status = 0; |
716 | struct ocfs2_super *osb; | 729 | struct ocfs2_super *osb; |
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local) | |||
729 | 742 | ||
730 | ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); | 743 | ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); |
731 | 744 | ||
732 | status = ocfs2_journal_toggle_dirty(osb, 1); | 745 | status = ocfs2_journal_toggle_dirty(osb, 1, replayed); |
733 | if (status < 0) { | 746 | if (status < 0) { |
734 | mlog_errno(status); | 747 | mlog_errno(status); |
735 | goto done; | 748 | goto done; |
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) | |||
771 | goto bail; | 784 | goto bail; |
772 | } | 785 | } |
773 | 786 | ||
774 | status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); | 787 | status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0); |
775 | if (status < 0) | 788 | if (status < 0) |
776 | mlog_errno(status); | 789 | mlog_errno(status); |
777 | 790 | ||
@@ -1034,6 +1047,12 @@ restart: | |||
1034 | spin_unlock(&osb->osb_lock); | 1047 | spin_unlock(&osb->osb_lock); |
1035 | mlog(0, "All nodes recovered\n"); | 1048 | mlog(0, "All nodes recovered\n"); |
1036 | 1049 | ||
1050 | /* Refresh all journal recovery generations from disk */ | ||
1051 | status = ocfs2_check_journals_nolocks(osb); | ||
1052 | status = (status == -EROFS) ? 0 : status; | ||
1053 | if (status < 0) | ||
1054 | mlog_errno(status); | ||
1055 | |||
1037 | ocfs2_super_unlock(osb, 1); | 1056 | ocfs2_super_unlock(osb, 1); |
1038 | 1057 | ||
1039 | /* We always run recovery on our own orphan dir - the dead | 1058 | /* We always run recovery on our own orphan dir - the dead |
@@ -1096,6 +1115,42 @@ out: | |||
1096 | mlog_exit_void(); | 1115 | mlog_exit_void(); |
1097 | } | 1116 | } |
1098 | 1117 | ||
1118 | static int ocfs2_read_journal_inode(struct ocfs2_super *osb, | ||
1119 | int slot_num, | ||
1120 | struct buffer_head **bh, | ||
1121 | struct inode **ret_inode) | ||
1122 | { | ||
1123 | int status = -EACCES; | ||
1124 | struct inode *inode = NULL; | ||
1125 | |||
1126 | BUG_ON(slot_num >= osb->max_slots); | ||
1127 | |||
1128 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, | ||
1129 | slot_num); | ||
1130 | if (!inode || is_bad_inode(inode)) { | ||
1131 | mlog_errno(status); | ||
1132 | goto bail; | ||
1133 | } | ||
1134 | SET_INODE_JOURNAL(inode); | ||
1135 | |||
1136 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); | ||
1137 | if (status < 0) { | ||
1138 | mlog_errno(status); | ||
1139 | goto bail; | ||
1140 | } | ||
1141 | |||
1142 | status = 0; | ||
1143 | |||
1144 | bail: | ||
1145 | if (inode) { | ||
1146 | if (status || !ret_inode) | ||
1147 | iput(inode); | ||
1148 | else | ||
1149 | *ret_inode = inode; | ||
1150 | } | ||
1151 | return status; | ||
1152 | } | ||
1153 | |||
1099 | /* Does the actual journal replay and marks the journal inode as | 1154 | /* Does the actual journal replay and marks the journal inode as |
1100 | * clean. Will only replay if the journal inode is marked dirty. */ | 1155 | * clean. Will only replay if the journal inode is marked dirty. */ |
1101 | static int ocfs2_replay_journal(struct ocfs2_super *osb, | 1156 | static int ocfs2_replay_journal(struct ocfs2_super *osb, |
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1109 | struct ocfs2_dinode *fe; | 1164 | struct ocfs2_dinode *fe; |
1110 | journal_t *journal = NULL; | 1165 | journal_t *journal = NULL; |
1111 | struct buffer_head *bh = NULL; | 1166 | struct buffer_head *bh = NULL; |
1167 | u32 slot_reco_gen; | ||
1112 | 1168 | ||
1113 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, | 1169 | status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode); |
1114 | slot_num); | 1170 | if (status) { |
1115 | if (inode == NULL) { | ||
1116 | status = -EACCES; | ||
1117 | mlog_errno(status); | 1171 | mlog_errno(status); |
1118 | goto done; | 1172 | goto done; |
1119 | } | 1173 | } |
1120 | if (is_bad_inode(inode)) { | 1174 | |
1121 | status = -EACCES; | 1175 | fe = (struct ocfs2_dinode *)bh->b_data; |
1122 | iput(inode); | 1176 | slot_reco_gen = ocfs2_get_recovery_generation(fe); |
1123 | inode = NULL; | 1177 | brelse(bh); |
1124 | mlog_errno(status); | 1178 | bh = NULL; |
1179 | |||
1180 | /* | ||
1181 | * As the fs recovery is asynchronous, there is a small chance that | ||
1182 | * another node mounted (and recovered) the slot before the recovery | ||
1183 | * thread could get the lock. To handle that, we dirty read the journal | ||
1184 | * inode for that slot to get the recovery generation. If it is | ||
1185 | * different than what we expected, the slot has been recovered. | ||
1186 | * If not, it needs recovery. | ||
1187 | */ | ||
1188 | if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { | ||
1189 | mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num, | ||
1190 | osb->slot_recovery_generations[slot_num], slot_reco_gen); | ||
1191 | osb->slot_recovery_generations[slot_num] = slot_reco_gen; | ||
1192 | status = -EBUSY; | ||
1125 | goto done; | 1193 | goto done; |
1126 | } | 1194 | } |
1127 | SET_INODE_JOURNAL(inode); | 1195 | |
1196 | /* Continue with recovery as the journal has not yet been recovered */ | ||
1128 | 1197 | ||
1129 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 1198 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); |
1130 | if (status < 0) { | 1199 | if (status < 0) { |
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1138 | fe = (struct ocfs2_dinode *) bh->b_data; | 1207 | fe = (struct ocfs2_dinode *) bh->b_data; |
1139 | 1208 | ||
1140 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); | 1209 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); |
1210 | slot_reco_gen = ocfs2_get_recovery_generation(fe); | ||
1141 | 1211 | ||
1142 | if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { | 1212 | if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { |
1143 | mlog(0, "No recovery required for node %d\n", node_num); | 1213 | mlog(0, "No recovery required for node %d\n", node_num); |
1214 | /* Refresh recovery generation for the slot */ | ||
1215 | osb->slot_recovery_generations[slot_num] = slot_reco_gen; | ||
1144 | goto done; | 1216 | goto done; |
1145 | } | 1217 | } |
1146 | 1218 | ||
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1188 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; | 1260 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; |
1189 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); | 1261 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); |
1190 | 1262 | ||
1263 | /* Increment recovery generation to indicate successful recovery */ | ||
1264 | ocfs2_bump_recovery_generation(fe); | ||
1265 | osb->slot_recovery_generations[slot_num] = | ||
1266 | ocfs2_get_recovery_generation(fe); | ||
1267 | |||
1191 | status = ocfs2_write_block(osb, bh, inode); | 1268 | status = ocfs2_write_block(osb, bh, inode); |
1192 | if (status < 0) | 1269 | if (status < 0) |
1193 | mlog_errno(status); | 1270 | mlog_errno(status); |
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
1252 | 1329 | ||
1253 | status = ocfs2_replay_journal(osb, node_num, slot_num); | 1330 | status = ocfs2_replay_journal(osb, node_num, slot_num); |
1254 | if (status < 0) { | 1331 | if (status < 0) { |
1332 | if (status == -EBUSY) { | ||
1333 | mlog(0, "Skipping recovery for slot %u (node %u) " | ||
1334 | "as another node has recovered it\n", slot_num, | ||
1335 | node_num); | ||
1336 | status = 0; | ||
1337 | goto done; | ||
1338 | } | ||
1255 | mlog_errno(status); | 1339 | mlog_errno(status); |
1256 | goto done; | 1340 | goto done; |
1257 | } | 1341 | } |
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1334 | { | 1418 | { |
1335 | unsigned int node_num; | 1419 | unsigned int node_num; |
1336 | int status, i; | 1420 | int status, i; |
1421 | struct buffer_head *bh = NULL; | ||
1422 | struct ocfs2_dinode *di; | ||
1337 | 1423 | ||
1338 | /* This is called with the super block cluster lock, so we | 1424 | /* This is called with the super block cluster lock, so we |
1339 | * know that the slot map can't change underneath us. */ | 1425 | * know that the slot map can't change underneath us. */ |
1340 | 1426 | ||
1341 | spin_lock(&osb->osb_lock); | 1427 | spin_lock(&osb->osb_lock); |
1342 | for (i = 0; i < osb->max_slots; i++) { | 1428 | for (i = 0; i < osb->max_slots; i++) { |
1429 | /* Read journal inode to get the recovery generation */ | ||
1430 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); | ||
1431 | if (status) { | ||
1432 | mlog_errno(status); | ||
1433 | goto bail; | ||
1434 | } | ||
1435 | di = (struct ocfs2_dinode *)bh->b_data; | ||
1436 | osb->slot_recovery_generations[i] = | ||
1437 | ocfs2_get_recovery_generation(di); | ||
1438 | brelse(bh); | ||
1439 | bh = NULL; | ||
1440 | |||
1441 | mlog(0, "Slot %u recovery generation is %u\n", i, | ||
1442 | osb->slot_recovery_generations[i]); | ||
1443 | |||
1343 | if (i == osb->slot_num) | 1444 | if (i == osb->slot_num) |
1344 | continue; | 1445 | continue; |
1345 | 1446 | ||
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg) | |||
1603 | return 0; | 1704 | return 0; |
1604 | } | 1705 | } |
1605 | 1706 | ||
1606 | /* Look for a dirty journal without taking any cluster locks. Used for | 1707 | /* Reads all the journal inodes without taking any cluster locks. Used |
1607 | * hard readonly access to determine whether the file system journals | 1708 | * for hard readonly access to determine whether any journal requires |
1608 | * require recovery. */ | 1709 | * recovery. Also used to refresh the recovery generation numbers after |
1710 | * a journal has been recovered by another node. | ||
1711 | */ | ||
1609 | int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) | 1712 | int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) |
1610 | { | 1713 | { |
1611 | int ret = 0; | 1714 | int ret = 0; |
1612 | unsigned int slot; | 1715 | unsigned int slot; |
1613 | struct buffer_head *di_bh; | 1716 | struct buffer_head *di_bh = NULL; |
1614 | struct ocfs2_dinode *di; | 1717 | struct ocfs2_dinode *di; |
1615 | struct inode *journal = NULL; | 1718 | int journal_dirty = 0; |
1616 | 1719 | ||
1617 | for(slot = 0; slot < osb->max_slots; slot++) { | 1720 | for(slot = 0; slot < osb->max_slots; slot++) { |
1618 | journal = ocfs2_get_system_file_inode(osb, | 1721 | ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL); |
1619 | JOURNAL_SYSTEM_INODE, | 1722 | if (ret) { |
1620 | slot); | ||
1621 | if (!journal || is_bad_inode(journal)) { | ||
1622 | ret = -EACCES; | ||
1623 | mlog_errno(ret); | ||
1624 | goto out; | ||
1625 | } | ||
1626 | |||
1627 | di_bh = NULL; | ||
1628 | ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh, | ||
1629 | 0, journal); | ||
1630 | if (ret < 0) { | ||
1631 | mlog_errno(ret); | 1723 | mlog_errno(ret); |
1632 | goto out; | 1724 | goto out; |
1633 | } | 1725 | } |
1634 | 1726 | ||
1635 | di = (struct ocfs2_dinode *) di_bh->b_data; | 1727 | di = (struct ocfs2_dinode *) di_bh->b_data; |
1636 | 1728 | ||
1729 | osb->slot_recovery_generations[slot] = | ||
1730 | ocfs2_get_recovery_generation(di); | ||
1731 | |||
1637 | if (le32_to_cpu(di->id1.journal1.ij_flags) & | 1732 | if (le32_to_cpu(di->id1.journal1.ij_flags) & |
1638 | OCFS2_JOURNAL_DIRTY_FL) | 1733 | OCFS2_JOURNAL_DIRTY_FL) |
1639 | ret = -EROFS; | 1734 | journal_dirty = 1; |
1640 | 1735 | ||
1641 | brelse(di_bh); | 1736 | brelse(di_bh); |
1642 | if (ret) | 1737 | di_bh = NULL; |
1643 | break; | ||
1644 | } | 1738 | } |
1645 | 1739 | ||
1646 | out: | 1740 | out: |
1647 | if (journal) | 1741 | if (journal_dirty) |
1648 | iput(journal); | 1742 | ret = -EROFS; |
1649 | |||
1650 | return ret; | 1743 | return ret; |
1651 | } | 1744 | } |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index db82be2532ed..2178ebffa05f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, | |||
161 | void ocfs2_journal_shutdown(struct ocfs2_super *osb); | 161 | void ocfs2_journal_shutdown(struct ocfs2_super *osb); |
162 | int ocfs2_journal_wipe(struct ocfs2_journal *journal, | 162 | int ocfs2_journal_wipe(struct ocfs2_journal *journal, |
163 | int full); | 163 | int full); |
164 | int ocfs2_journal_load(struct ocfs2_journal *journal, int local); | 164 | int ocfs2_journal_load(struct ocfs2_journal *journal, int local, |
165 | int replayed); | ||
165 | int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); | 166 | int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); |
166 | void ocfs2_recovery_thread(struct ocfs2_super *osb, | 167 | void ocfs2_recovery_thread(struct ocfs2_super *osb, |
167 | int node_num); | 168 | int node_num); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1cb814be8ef1..7f625f2b1117 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -204,6 +204,8 @@ struct ocfs2_super | |||
204 | 204 | ||
205 | struct ocfs2_slot_info *slot_info; | 205 | struct ocfs2_slot_info *slot_info; |
206 | 206 | ||
207 | u32 *slot_recovery_generations; | ||
208 | |||
207 | spinlock_t node_map_lock; | 209 | spinlock_t node_map_lock; |
208 | 210 | ||
209 | u64 root_blkno; | 211 | u64 root_blkno; |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 3f1945177629..4f619850ccf7 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -660,7 +660,10 @@ struct ocfs2_dinode { | |||
660 | struct { /* Info for journal system | 660 | struct { /* Info for journal system |
661 | inodes */ | 661 | inodes */ |
662 | __le32 ij_flags; /* Mounted, version, etc. */ | 662 | __le32 ij_flags; /* Mounted, version, etc. */ |
663 | __le32 ij_pad; | 663 | __le32 ij_recovery_generation; /* Incremented when the |
664 | journal is recovered | ||
665 | after an unclean | ||
666 | shutdown */ | ||
664 | } journal1; | 667 | } journal1; |
665 | } id1; /* Inode type dependant 1 */ | 668 | } id1; /* Inode type dependant 1 */ |
666 | /*C0*/ union { | 669 | /*C0*/ union { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2560b33889aa..88255d3f52b4 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1442 | } | 1442 | } |
1443 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | 1443 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); |
1444 | 1444 | ||
1445 | osb->slot_recovery_generations = | ||
1446 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), | ||
1447 | GFP_KERNEL); | ||
1448 | if (!osb->slot_recovery_generations) { | ||
1449 | status = -ENOMEM; | ||
1450 | mlog_errno(status); | ||
1451 | goto bail; | ||
1452 | } | ||
1453 | |||
1445 | init_waitqueue_head(&osb->osb_wipe_event); | 1454 | init_waitqueue_head(&osb->osb_wipe_event); |
1446 | osb->osb_orphan_wipes = kcalloc(osb->max_slots, | 1455 | osb->osb_orphan_wipes = kcalloc(osb->max_slots, |
1447 | sizeof(*osb->osb_orphan_wipes), | 1456 | sizeof(*osb->osb_orphan_wipes), |
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
1703 | local = ocfs2_mount_local(osb); | 1712 | local = ocfs2_mount_local(osb); |
1704 | 1713 | ||
1705 | /* will play back anything left in the journal. */ | 1714 | /* will play back anything left in the journal. */ |
1706 | status = ocfs2_journal_load(osb->journal, local); | 1715 | status = ocfs2_journal_load(osb->journal, local, dirty); |
1707 | if (status < 0) { | 1716 | if (status < 0) { |
1708 | mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); | 1717 | mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); |
1709 | goto finally; | 1718 | goto finally; |
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
1768 | ocfs2_free_slot_info(osb); | 1777 | ocfs2_free_slot_info(osb); |
1769 | 1778 | ||
1770 | kfree(osb->osb_orphan_wipes); | 1779 | kfree(osb->osb_orphan_wipes); |
1780 | kfree(osb->slot_recovery_generations); | ||
1771 | /* FIXME | 1781 | /* FIXME |
1772 | * This belongs in journal shutdown, but because we have to | 1782 | * This belongs in journal shutdown, but because we have to |
1773 | * allocate osb->journal at the start of ocfs2_initalize_osb(), | 1783 | * allocate osb->journal at the start of ocfs2_initalize_osb(), |