diff options
Diffstat (limited to 'fs')
269 files changed, 16596 insertions, 7838 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 9f7270f36b2a..525da2e8f73b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -62,6 +62,16 @@ source "fs/autofs/Kconfig" | |||
62 | source "fs/autofs4/Kconfig" | 62 | source "fs/autofs4/Kconfig" |
63 | source "fs/fuse/Kconfig" | 63 | source "fs/fuse/Kconfig" |
64 | 64 | ||
65 | config CUSE | ||
66 | tristate "Character device in Userpace support" | ||
67 | depends on FUSE_FS | ||
68 | help | ||
69 | This FUSE extension allows character devices to be | ||
70 | implemented in userspace. | ||
71 | |||
72 | If you want to develop or use userspace character device | ||
73 | based on CUSE, answer Y or M. | ||
74 | |||
65 | config GENERIC_ACL | 75 | config GENERIC_ACL |
66 | bool | 76 | bool |
67 | select FS_POSIX_ACL | 77 | select FS_POSIX_ACL |
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index e0a85dbeeb88..a6665f37f456 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h | |||
@@ -53,6 +53,7 @@ struct adfs_dir_ops { | |||
53 | int (*update)(struct adfs_dir *dir, struct object_info *obj); | 53 | int (*update)(struct adfs_dir *dir, struct object_info *obj); |
54 | int (*create)(struct adfs_dir *dir, struct object_info *obj); | 54 | int (*create)(struct adfs_dir *dir, struct object_info *obj); |
55 | int (*remove)(struct adfs_dir *dir, struct object_info *obj); | 55 | int (*remove)(struct adfs_dir *dir, struct object_info *obj); |
56 | int (*sync)(struct adfs_dir *dir); | ||
56 | void (*free)(struct adfs_dir *dir); | 57 | void (*free)(struct adfs_dir *dir); |
57 | }; | 58 | }; |
58 | 59 | ||
@@ -90,7 +91,8 @@ extern const struct dentry_operations adfs_dentry_operations; | |||
90 | extern struct adfs_dir_ops adfs_f_dir_ops; | 91 | extern struct adfs_dir_ops adfs_f_dir_ops; |
91 | extern struct adfs_dir_ops adfs_fplus_dir_ops; | 92 | extern struct adfs_dir_ops adfs_fplus_dir_ops; |
92 | 93 | ||
93 | extern int adfs_dir_update(struct super_block *sb, struct object_info *obj); | 94 | extern int adfs_dir_update(struct super_block *sb, struct object_info *obj, |
95 | int wait); | ||
94 | 96 | ||
95 | /* file.c */ | 97 | /* file.c */ |
96 | extern const struct inode_operations adfs_file_inode_operations; | 98 | extern const struct inode_operations adfs_file_inode_operations; |
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index e867ccf37246..4d4073447d1a 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
@@ -83,7 +83,7 @@ out: | |||
83 | } | 83 | } |
84 | 84 | ||
85 | int | 85 | int |
86 | adfs_dir_update(struct super_block *sb, struct object_info *obj) | 86 | adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait) |
87 | { | 87 | { |
88 | int ret = -EINVAL; | 88 | int ret = -EINVAL; |
89 | #ifdef CONFIG_ADFS_FS_RW | 89 | #ifdef CONFIG_ADFS_FS_RW |
@@ -106,6 +106,12 @@ adfs_dir_update(struct super_block *sb, struct object_info *obj) | |||
106 | ret = ops->update(&dir, obj); | 106 | ret = ops->update(&dir, obj); |
107 | write_unlock(&adfs_dir_lock); | 107 | write_unlock(&adfs_dir_lock); |
108 | 108 | ||
109 | if (wait) { | ||
110 | int err = ops->sync(&dir); | ||
111 | if (!ret) | ||
112 | ret = err; | ||
113 | } | ||
114 | |||
109 | ops->free(&dir); | 115 | ops->free(&dir); |
110 | out: | 116 | out: |
111 | #endif | 117 | #endif |
@@ -199,7 +205,7 @@ const struct file_operations adfs_dir_operations = { | |||
199 | .read = generic_read_dir, | 205 | .read = generic_read_dir, |
200 | .llseek = generic_file_llseek, | 206 | .llseek = generic_file_llseek, |
201 | .readdir = adfs_readdir, | 207 | .readdir = adfs_readdir, |
202 | .fsync = file_fsync, | 208 | .fsync = simple_fsync, |
203 | }; | 209 | }; |
204 | 210 | ||
205 | static int | 211 | static int |
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index ea7df2146921..31df6adf0de6 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c | |||
@@ -437,6 +437,22 @@ bad_dir: | |||
437 | #endif | 437 | #endif |
438 | } | 438 | } |
439 | 439 | ||
440 | static int | ||
441 | adfs_f_sync(struct adfs_dir *dir) | ||
442 | { | ||
443 | int err = 0; | ||
444 | int i; | ||
445 | |||
446 | for (i = dir->nr_buffers - 1; i >= 0; i--) { | ||
447 | struct buffer_head *bh = dir->bh[i]; | ||
448 | sync_dirty_buffer(bh); | ||
449 | if (buffer_req(bh) && !buffer_uptodate(bh)) | ||
450 | err = -EIO; | ||
451 | } | ||
452 | |||
453 | return err; | ||
454 | } | ||
455 | |||
440 | static void | 456 | static void |
441 | adfs_f_free(struct adfs_dir *dir) | 457 | adfs_f_free(struct adfs_dir *dir) |
442 | { | 458 | { |
@@ -456,5 +472,6 @@ struct adfs_dir_ops adfs_f_dir_ops = { | |||
456 | .setpos = adfs_f_setpos, | 472 | .setpos = adfs_f_setpos, |
457 | .getnext = adfs_f_getnext, | 473 | .getnext = adfs_f_getnext, |
458 | .update = adfs_f_update, | 474 | .update = adfs_f_update, |
475 | .sync = adfs_f_sync, | ||
459 | .free = adfs_f_free | 476 | .free = adfs_f_free |
460 | }; | 477 | }; |
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 1ec644e32df9..139e0f345f18 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c | |||
@@ -161,6 +161,22 @@ out: | |||
161 | return ret; | 161 | return ret; |
162 | } | 162 | } |
163 | 163 | ||
164 | static int | ||
165 | adfs_fplus_sync(struct adfs_dir *dir) | ||
166 | { | ||
167 | int err = 0; | ||
168 | int i; | ||
169 | |||
170 | for (i = dir->nr_buffers - 1; i >= 0; i--) { | ||
171 | struct buffer_head *bh = dir->bh[i]; | ||
172 | sync_dirty_buffer(bh); | ||
173 | if (buffer_req(bh) && !buffer_uptodate(bh)) | ||
174 | err = -EIO; | ||
175 | } | ||
176 | |||
177 | return err; | ||
178 | } | ||
179 | |||
164 | static void | 180 | static void |
165 | adfs_fplus_free(struct adfs_dir *dir) | 181 | adfs_fplus_free(struct adfs_dir *dir) |
166 | { | 182 | { |
@@ -175,5 +191,6 @@ struct adfs_dir_ops adfs_fplus_dir_ops = { | |||
175 | .read = adfs_fplus_read, | 191 | .read = adfs_fplus_read, |
176 | .setpos = adfs_fplus_setpos, | 192 | .setpos = adfs_fplus_setpos, |
177 | .getnext = adfs_fplus_getnext, | 193 | .getnext = adfs_fplus_getnext, |
194 | .sync = adfs_fplus_sync, | ||
178 | .free = adfs_fplus_free | 195 | .free = adfs_fplus_free |
179 | }; | 196 | }; |
diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 36e381c6a99a..8224d54a2afb 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c | |||
@@ -30,7 +30,7 @@ const struct file_operations adfs_file_operations = { | |||
30 | .read = do_sync_read, | 30 | .read = do_sync_read, |
31 | .aio_read = generic_file_aio_read, | 31 | .aio_read = generic_file_aio_read, |
32 | .mmap = generic_file_mmap, | 32 | .mmap = generic_file_mmap, |
33 | .fsync = file_fsync, | 33 | .fsync = simple_fsync, |
34 | .write = do_sync_write, | 34 | .write = do_sync_write, |
35 | .aio_write = generic_file_aio_write, | 35 | .aio_write = generic_file_aio_write, |
36 | .splice_read = generic_file_splice_read, | 36 | .splice_read = generic_file_splice_read, |
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index e647200262a2..05b3a677201d 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -376,7 +376,7 @@ out: | |||
376 | * The adfs-specific inode data has already been updated by | 376 | * The adfs-specific inode data has already been updated by |
377 | * adfs_notify_change() | 377 | * adfs_notify_change() |
378 | */ | 378 | */ |
379 | int adfs_write_inode(struct inode *inode, int unused) | 379 | int adfs_write_inode(struct inode *inode, int wait) |
380 | { | 380 | { |
381 | struct super_block *sb = inode->i_sb; | 381 | struct super_block *sb = inode->i_sb; |
382 | struct object_info obj; | 382 | struct object_info obj; |
@@ -391,7 +391,7 @@ int adfs_write_inode(struct inode *inode, int unused) | |||
391 | obj.attr = ADFS_I(inode)->attr; | 391 | obj.attr = ADFS_I(inode)->attr; |
392 | obj.size = inode->i_size; | 392 | obj.size = inode->i_size; |
393 | 393 | ||
394 | ret = adfs_dir_update(sb, &obj); | 394 | ret = adfs_dir_update(sb, &obj, wait); |
395 | unlock_kernel(); | 395 | unlock_kernel(); |
396 | return ret; | 396 | return ret; |
397 | } | 397 | } |
diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 92ab4fbc2031..568081b93f73 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c | |||
@@ -62,7 +62,7 @@ static DEFINE_RWLOCK(adfs_map_lock); | |||
62 | #define GET_FRAG_ID(_map,_start,_idmask) \ | 62 | #define GET_FRAG_ID(_map,_start,_idmask) \ |
63 | ({ \ | 63 | ({ \ |
64 | unsigned char *_m = _map + (_start >> 3); \ | 64 | unsigned char *_m = _map + (_start >> 3); \ |
65 | u32 _frag = get_unaligned((u32 *)_m); \ | 65 | u32 _frag = get_unaligned_le32(_m); \ |
66 | _frag >>= (_start & 7); \ | 66 | _frag >>= (_start & 7); \ |
67 | _frag & _idmask; \ | 67 | _frag & _idmask; \ |
68 | }) | 68 | }) |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index dd9becca4241..0ec5aaf47aa7 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -132,11 +132,15 @@ static void adfs_put_super(struct super_block *sb) | |||
132 | int i; | 132 | int i; |
133 | struct adfs_sb_info *asb = ADFS_SB(sb); | 133 | struct adfs_sb_info *asb = ADFS_SB(sb); |
134 | 134 | ||
135 | lock_kernel(); | ||
136 | |||
135 | for (i = 0; i < asb->s_map_size; i++) | 137 | for (i = 0; i < asb->s_map_size; i++) |
136 | brelse(asb->s_map[i].dm_bh); | 138 | brelse(asb->s_map[i].dm_bh); |
137 | kfree(asb->s_map); | 139 | kfree(asb->s_map); |
138 | kfree(asb); | 140 | kfree(asb); |
139 | sb->s_fs_info = NULL; | 141 | sb->s_fs_info = NULL; |
142 | |||
143 | unlock_kernel(); | ||
140 | } | 144 | } |
141 | 145 | ||
142 | static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) | 146 | static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) |
diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 1a2d5e3c7f4e..e511dc621a2e 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h | |||
@@ -182,6 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent | |||
182 | 182 | ||
183 | void affs_free_prealloc(struct inode *inode); | 183 | void affs_free_prealloc(struct inode *inode); |
184 | extern void affs_truncate(struct inode *); | 184 | extern void affs_truncate(struct inode *); |
185 | int affs_file_fsync(struct file *, struct dentry *, int); | ||
185 | 186 | ||
186 | /* dir.c */ | 187 | /* dir.c */ |
187 | 188 | ||
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 7b36904dbeac..8ca8f3a55599 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
@@ -21,7 +21,7 @@ const struct file_operations affs_dir_operations = { | |||
21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
22 | .llseek = generic_file_llseek, | 22 | .llseek = generic_file_llseek, |
23 | .readdir = affs_readdir, | 23 | .readdir = affs_readdir, |
24 | .fsync = file_fsync, | 24 | .fsync = affs_file_fsync, |
25 | }; | 25 | }; |
26 | 26 | ||
27 | /* | 27 | /* |
diff --git a/fs/affs/file.c b/fs/affs/file.c index 9246cb4aa018..184e55c1c9ba 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -34,7 +34,7 @@ const struct file_operations affs_file_operations = { | |||
34 | .mmap = generic_file_mmap, | 34 | .mmap = generic_file_mmap, |
35 | .open = affs_file_open, | 35 | .open = affs_file_open, |
36 | .release = affs_file_release, | 36 | .release = affs_file_release, |
37 | .fsync = file_fsync, | 37 | .fsync = affs_file_fsync, |
38 | .splice_read = generic_file_splice_read, | 38 | .splice_read = generic_file_splice_read, |
39 | }; | 39 | }; |
40 | 40 | ||
@@ -915,3 +915,15 @@ affs_truncate(struct inode *inode) | |||
915 | } | 915 | } |
916 | affs_free_prealloc(inode); | 916 | affs_free_prealloc(inode); |
917 | } | 917 | } |
918 | |||
919 | int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) | ||
920 | { | ||
921 | struct inode * inode = dentry->d_inode; | ||
922 | int ret, err; | ||
923 | |||
924 | ret = write_inode_now(inode, 0); | ||
925 | err = sync_blockdev(inode->i_sb->s_bdev); | ||
926 | if (!ret) | ||
927 | ret = err; | ||
928 | return ret; | ||
929 | } | ||
diff --git a/fs/affs/super.c b/fs/affs/super.c index 63f5183f263b..104fdcb3a7fc 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/parser.h> | 16 | #include <linux/parser.h> |
17 | #include <linux/magic.h> | 17 | #include <linux/magic.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/smp_lock.h> | ||
19 | #include "affs.h" | 20 | #include "affs.h" |
20 | 21 | ||
21 | extern struct timezone sys_tz; | 22 | extern struct timezone sys_tz; |
@@ -24,49 +25,67 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); | |||
24 | static int affs_remount (struct super_block *sb, int *flags, char *data); | 25 | static int affs_remount (struct super_block *sb, int *flags, char *data); |
25 | 26 | ||
26 | static void | 27 | static void |
28 | affs_commit_super(struct super_block *sb, int clean) | ||
29 | { | ||
30 | struct affs_sb_info *sbi = AFFS_SB(sb); | ||
31 | struct buffer_head *bh = sbi->s_root_bh; | ||
32 | struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); | ||
33 | |||
34 | tail->bm_flag = cpu_to_be32(clean); | ||
35 | secs_to_datestamp(get_seconds(), &tail->disk_change); | ||
36 | affs_fix_checksum(sb, bh); | ||
37 | mark_buffer_dirty(bh); | ||
38 | } | ||
39 | |||
40 | static void | ||
27 | affs_put_super(struct super_block *sb) | 41 | affs_put_super(struct super_block *sb) |
28 | { | 42 | { |
29 | struct affs_sb_info *sbi = AFFS_SB(sb); | 43 | struct affs_sb_info *sbi = AFFS_SB(sb); |
30 | pr_debug("AFFS: put_super()\n"); | 44 | pr_debug("AFFS: put_super()\n"); |
31 | 45 | ||
32 | if (!(sb->s_flags & MS_RDONLY)) { | 46 | lock_kernel(); |
33 | AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1); | 47 | |
34 | secs_to_datestamp(get_seconds(), | 48 | if (!(sb->s_flags & MS_RDONLY)) |
35 | &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change); | 49 | affs_commit_super(sb, 1); |
36 | affs_fix_checksum(sb, sbi->s_root_bh); | ||
37 | mark_buffer_dirty(sbi->s_root_bh); | ||
38 | } | ||
39 | 50 | ||
40 | kfree(sbi->s_prefix); | 51 | kfree(sbi->s_prefix); |
41 | affs_free_bitmap(sb); | 52 | affs_free_bitmap(sb); |
42 | affs_brelse(sbi->s_root_bh); | 53 | affs_brelse(sbi->s_root_bh); |
43 | kfree(sbi); | 54 | kfree(sbi); |
44 | sb->s_fs_info = NULL; | 55 | sb->s_fs_info = NULL; |
45 | return; | 56 | |
57 | unlock_kernel(); | ||
46 | } | 58 | } |
47 | 59 | ||
48 | static void | 60 | static void |
49 | affs_write_super(struct super_block *sb) | 61 | affs_write_super(struct super_block *sb) |
50 | { | 62 | { |
51 | int clean = 2; | 63 | int clean = 2; |
52 | struct affs_sb_info *sbi = AFFS_SB(sb); | ||
53 | 64 | ||
65 | lock_super(sb); | ||
54 | if (!(sb->s_flags & MS_RDONLY)) { | 66 | if (!(sb->s_flags & MS_RDONLY)) { |
55 | // if (sbi->s_bitmap[i].bm_bh) { | 67 | // if (sbi->s_bitmap[i].bm_bh) { |
56 | // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { | 68 | // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { |
57 | // clean = 0; | 69 | // clean = 0; |
58 | AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(clean); | 70 | affs_commit_super(sb, clean); |
59 | secs_to_datestamp(get_seconds(), | ||
60 | &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change); | ||
61 | affs_fix_checksum(sb, sbi->s_root_bh); | ||
62 | mark_buffer_dirty(sbi->s_root_bh); | ||
63 | sb->s_dirt = !clean; /* redo until bitmap synced */ | 71 | sb->s_dirt = !clean; /* redo until bitmap synced */ |
64 | } else | 72 | } else |
65 | sb->s_dirt = 0; | 73 | sb->s_dirt = 0; |
74 | unlock_super(sb); | ||
66 | 75 | ||
67 | pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); | 76 | pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); |
68 | } | 77 | } |
69 | 78 | ||
79 | static int | ||
80 | affs_sync_fs(struct super_block *sb, int wait) | ||
81 | { | ||
82 | lock_super(sb); | ||
83 | affs_commit_super(sb, 2); | ||
84 | sb->s_dirt = 0; | ||
85 | unlock_super(sb); | ||
86 | return 0; | ||
87 | } | ||
88 | |||
70 | static struct kmem_cache * affs_inode_cachep; | 89 | static struct kmem_cache * affs_inode_cachep; |
71 | 90 | ||
72 | static struct inode *affs_alloc_inode(struct super_block *sb) | 91 | static struct inode *affs_alloc_inode(struct super_block *sb) |
@@ -124,6 +143,7 @@ static const struct super_operations affs_sops = { | |||
124 | .clear_inode = affs_clear_inode, | 143 | .clear_inode = affs_clear_inode, |
125 | .put_super = affs_put_super, | 144 | .put_super = affs_put_super, |
126 | .write_super = affs_write_super, | 145 | .write_super = affs_write_super, |
146 | .sync_fs = affs_sync_fs, | ||
127 | .statfs = affs_statfs, | 147 | .statfs = affs_statfs, |
128 | .remount_fs = affs_remount, | 148 | .remount_fs = affs_remount, |
129 | .show_options = generic_show_options, | 149 | .show_options = generic_show_options, |
@@ -507,6 +527,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
507 | kfree(new_opts); | 527 | kfree(new_opts); |
508 | return -EINVAL; | 528 | return -EINVAL; |
509 | } | 529 | } |
530 | lock_kernel(); | ||
510 | replace_mount_options(sb, new_opts); | 531 | replace_mount_options(sb, new_opts); |
511 | 532 | ||
512 | sbi->s_flags = mount_flags; | 533 | sbi->s_flags = mount_flags; |
@@ -514,8 +535,10 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
514 | sbi->s_uid = uid; | 535 | sbi->s_uid = uid; |
515 | sbi->s_gid = gid; | 536 | sbi->s_gid = gid; |
516 | 537 | ||
517 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 538 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { |
539 | unlock_kernel(); | ||
518 | return 0; | 540 | return 0; |
541 | } | ||
519 | if (*flags & MS_RDONLY) { | 542 | if (*flags & MS_RDONLY) { |
520 | sb->s_dirt = 1; | 543 | sb->s_dirt = 1; |
521 | while (sb->s_dirt) | 544 | while (sb->s_dirt) |
@@ -524,6 +547,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
524 | } else | 547 | } else |
525 | res = affs_init_bitmap(sb, flags); | 548 | res = affs_init_bitmap(sb, flags); |
526 | 549 | ||
550 | unlock_kernel(); | ||
527 | return res; | 551 | return res; |
528 | } | 552 | } |
529 | 553 | ||
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2b9e2d03a390..c52be53f6946 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
244 | case -EBUSY: | 244 | case -EBUSY: |
245 | /* someone else made a mount here whilst we were busy */ | 245 | /* someone else made a mount here whilst we were busy */ |
246 | while (d_mountpoint(nd->path.dentry) && | 246 | while (d_mountpoint(nd->path.dentry) && |
247 | follow_down(&nd->path.mnt, &nd->path.dentry)) | 247 | follow_down(&nd->path)) |
248 | ; | 248 | ; |
249 | err = 0; | 249 | err = 0; |
250 | default: | 250 | default: |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 76828e5f8a39..ad0514d0115f 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -440,8 +440,12 @@ static void afs_put_super(struct super_block *sb) | |||
440 | 440 | ||
441 | _enter(""); | 441 | _enter(""); |
442 | 442 | ||
443 | lock_kernel(); | ||
444 | |||
443 | afs_put_volume(as->volume); | 445 | afs_put_volume(as->volume); |
444 | 446 | ||
447 | unlock_kernel(); | ||
448 | |||
445 | _leave(""); | 449 | _leave(""); |
446 | } | 450 | } |
447 | 451 | ||
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 4eb4d8dfb2f1..2316e944a109 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c | |||
@@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
85 | } | 85 | } |
86 | path.mnt = mnt; | 86 | path.mnt = mnt; |
87 | path_get(&path); | 87 | path_get(&path); |
88 | if (!follow_down(&path.mnt, &path.dentry)) { | 88 | if (!follow_down(&path)) { |
89 | path_put(&path); | 89 | path_put(&path); |
90 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 90 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
91 | continue; | 91 | continue; |
92 | } | 92 | } |
93 | while (d_mountpoint(path.dentry) && | 93 | while (d_mountpoint(path.dentry) && follow_down(&path)); |
94 | follow_down(&path.mnt, &path.dentry)) | ||
95 | ; | 94 | ; |
96 | umount_ok = may_umount(path.mnt); | 95 | umount_ok = may_umount(path.mnt); |
97 | path_put(&path); | 96 | path_put(&path); |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index b7ff33c63101..8f7cdde41733 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); | |||
223 | int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); | 223 | int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); |
224 | void autofs4_catatonic_mode(struct autofs_sb_info *); | 224 | void autofs4_catatonic_mode(struct autofs_sb_info *); |
225 | 225 | ||
226 | static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry) | 226 | static inline int autofs4_follow_mount(struct path *path) |
227 | { | 227 | { |
228 | int res = 0; | 228 | int res = 0; |
229 | 229 | ||
230 | while (d_mountpoint(*dentry)) { | 230 | while (d_mountpoint(path->dentry)) { |
231 | int followed = follow_down(mnt, dentry); | 231 | int followed = follow_down(path); |
232 | if (!followed) | 232 | if (!followed) |
233 | break; | 233 | break; |
234 | res = 1; | 234 | res = 1; |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 84168c0dcc2d..f3da2eb51f56 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -192,77 +192,42 @@ static int autofs_dev_ioctl_protosubver(struct file *fp, | |||
192 | return 0; | 192 | return 0; |
193 | } | 193 | } |
194 | 194 | ||
195 | /* | 195 | static int find_autofs_mount(const char *pathname, |
196 | * Walk down the mount stack looking for an autofs mount that | 196 | struct path *res, |
197 | * has the requested device number (aka. new_encode_dev(sb->s_dev). | 197 | int test(struct path *path, void *data), |
198 | */ | 198 | void *data) |
199 | static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno) | ||
200 | { | 199 | { |
201 | struct dentry *dentry; | 200 | struct path path; |
202 | struct inode *inode; | 201 | int err = kern_path(pathname, 0, &path); |
203 | struct super_block *sb; | 202 | if (err) |
204 | dev_t s_dev; | 203 | return err; |
205 | unsigned int err; | ||
206 | |||
207 | err = -ENOENT; | 204 | err = -ENOENT; |
208 | 205 | while (path.dentry == path.mnt->mnt_root) { | |
209 | /* Lookup the dentry name at the base of our mount point */ | 206 | if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) { |
210 | dentry = d_lookup(nd->path.dentry, &nd->last); | 207 | if (test(&path, data)) { |
211 | if (!dentry) | 208 | path_get(&path); |
212 | goto out; | 209 | if (!err) /* already found some */ |
213 | 210 | path_put(res); | |
214 | dput(nd->path.dentry); | 211 | *res = path; |
215 | nd->path.dentry = dentry; | ||
216 | |||
217 | /* And follow the mount stack looking for our autofs mount */ | ||
218 | while (follow_down(&nd->path.mnt, &nd->path.dentry)) { | ||
219 | inode = nd->path.dentry->d_inode; | ||
220 | if (!inode) | ||
221 | break; | ||
222 | |||
223 | sb = inode->i_sb; | ||
224 | s_dev = new_encode_dev(sb->s_dev); | ||
225 | if (devno == s_dev) { | ||
226 | if (sb->s_magic == AUTOFS_SUPER_MAGIC) { | ||
227 | err = 0; | 212 | err = 0; |
228 | break; | ||
229 | } | 213 | } |
230 | } | 214 | } |
215 | if (!follow_up(&path)) | ||
216 | break; | ||
231 | } | 217 | } |
232 | out: | 218 | path_put(&path); |
233 | return err; | 219 | return err; |
234 | } | 220 | } |
235 | 221 | ||
236 | /* | 222 | static int test_by_dev(struct path *path, void *p) |
237 | * Walk down the mount stack looking for an autofs mount that | ||
238 | * has the requested mount type (ie. indirect, direct or offset). | ||
239 | */ | ||
240 | static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type) | ||
241 | { | 223 | { |
242 | struct dentry *dentry; | 224 | return path->mnt->mnt_sb->s_dev == *(dev_t *)p; |
243 | struct autofs_info *ino; | 225 | } |
244 | unsigned int err; | ||
245 | |||
246 | err = -ENOENT; | ||
247 | |||
248 | /* Lookup the dentry name at the base of our mount point */ | ||
249 | dentry = d_lookup(nd->path.dentry, &nd->last); | ||
250 | if (!dentry) | ||
251 | goto out; | ||
252 | |||
253 | dput(nd->path.dentry); | ||
254 | nd->path.dentry = dentry; | ||
255 | 226 | ||
256 | /* And follow the mount stack looking for our autofs mount */ | 227 | static int test_by_type(struct path *path, void *p) |
257 | while (follow_down(&nd->path.mnt, &nd->path.dentry)) { | 228 | { |
258 | ino = autofs4_dentry_ino(nd->path.dentry); | 229 | struct autofs_info *ino = autofs4_dentry_ino(path->dentry); |
259 | if (ino && ino->sbi->type & type) { | 230 | return ino && ino->sbi->type & *(unsigned *)p; |
260 | err = 0; | ||
261 | break; | ||
262 | } | ||
263 | } | ||
264 | out: | ||
265 | return err; | ||
266 | } | 231 | } |
267 | 232 | ||
268 | static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) | 233 | static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) |
@@ -283,31 +248,25 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) | |||
283 | * Open a file descriptor on the autofs mount point corresponding | 248 | * Open a file descriptor on the autofs mount point corresponding |
284 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). | 249 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). |
285 | */ | 250 | */ |
286 | static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid) | 251 | static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) |
287 | { | 252 | { |
288 | struct file *filp; | ||
289 | struct nameidata nd; | ||
290 | int err, fd; | 253 | int err, fd; |
291 | 254 | ||
292 | fd = get_unused_fd(); | 255 | fd = get_unused_fd(); |
293 | if (likely(fd >= 0)) { | 256 | if (likely(fd >= 0)) { |
294 | /* Get nameidata of the parent directory */ | 257 | struct file *filp; |
295 | err = path_lookup(path, LOOKUP_PARENT, &nd); | 258 | struct path path; |
259 | |||
260 | err = find_autofs_mount(name, &path, test_by_dev, &devid); | ||
296 | if (err) | 261 | if (err) |
297 | goto out; | 262 | goto out; |
298 | 263 | ||
299 | /* | 264 | /* |
300 | * Search down, within the parent, looking for an | 265 | * Find autofs super block that has the device number |
301 | * autofs super block that has the device number | ||
302 | * corresponding to the autofs fs we want to open. | 266 | * corresponding to the autofs fs we want to open. |
303 | */ | 267 | */ |
304 | err = autofs_dev_ioctl_find_super(&nd, devid); | ||
305 | if (err) { | ||
306 | path_put(&nd.path); | ||
307 | goto out; | ||
308 | } | ||
309 | 268 | ||
310 | filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, | 269 | filp = dentry_open(path.dentry, path.mnt, O_RDONLY, |
311 | current_cred()); | 270 | current_cred()); |
312 | if (IS_ERR(filp)) { | 271 | if (IS_ERR(filp)) { |
313 | err = PTR_ERR(filp); | 272 | err = PTR_ERR(filp); |
@@ -340,7 +299,7 @@ static int autofs_dev_ioctl_openmount(struct file *fp, | |||
340 | param->ioctlfd = -1; | 299 | param->ioctlfd = -1; |
341 | 300 | ||
342 | path = param->path; | 301 | path = param->path; |
343 | devid = param->openmount.devid; | 302 | devid = new_decode_dev(param->openmount.devid); |
344 | 303 | ||
345 | err = 0; | 304 | err = 0; |
346 | fd = autofs_dev_ioctl_open_mountpoint(path, devid); | 305 | fd = autofs_dev_ioctl_open_mountpoint(path, devid); |
@@ -475,8 +434,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
475 | struct autofs_dev_ioctl *param) | 434 | struct autofs_dev_ioctl *param) |
476 | { | 435 | { |
477 | struct autofs_info *ino; | 436 | struct autofs_info *ino; |
478 | struct nameidata nd; | 437 | struct path path; |
479 | const char *path; | ||
480 | dev_t devid; | 438 | dev_t devid; |
481 | int err = -ENOENT; | 439 | int err = -ENOENT; |
482 | 440 | ||
@@ -485,32 +443,24 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
485 | goto out; | 443 | goto out; |
486 | } | 444 | } |
487 | 445 | ||
488 | path = param->path; | 446 | devid = sbi->sb->s_dev; |
489 | devid = new_encode_dev(sbi->sb->s_dev); | ||
490 | 447 | ||
491 | param->requester.uid = param->requester.gid = -1; | 448 | param->requester.uid = param->requester.gid = -1; |
492 | 449 | ||
493 | /* Get nameidata of the parent directory */ | 450 | err = find_autofs_mount(param->path, &path, test_by_dev, &devid); |
494 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
495 | if (err) | 451 | if (err) |
496 | goto out; | 452 | goto out; |
497 | 453 | ||
498 | err = autofs_dev_ioctl_find_super(&nd, devid); | 454 | ino = autofs4_dentry_ino(path.dentry); |
499 | if (err) | ||
500 | goto out_release; | ||
501 | |||
502 | ino = autofs4_dentry_ino(nd.path.dentry); | ||
503 | if (ino) { | 455 | if (ino) { |
504 | err = 0; | 456 | err = 0; |
505 | autofs4_expire_wait(nd.path.dentry); | 457 | autofs4_expire_wait(path.dentry); |
506 | spin_lock(&sbi->fs_lock); | 458 | spin_lock(&sbi->fs_lock); |
507 | param->requester.uid = ino->uid; | 459 | param->requester.uid = ino->uid; |
508 | param->requester.gid = ino->gid; | 460 | param->requester.gid = ino->gid; |
509 | spin_unlock(&sbi->fs_lock); | 461 | spin_unlock(&sbi->fs_lock); |
510 | } | 462 | } |
511 | 463 | path_put(&path); | |
512 | out_release: | ||
513 | path_put(&nd.path); | ||
514 | out: | 464 | out: |
515 | return err; | 465 | return err; |
516 | } | 466 | } |
@@ -569,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, | |||
569 | struct autofs_sb_info *sbi, | 519 | struct autofs_sb_info *sbi, |
570 | struct autofs_dev_ioctl *param) | 520 | struct autofs_dev_ioctl *param) |
571 | { | 521 | { |
572 | struct nameidata nd; | 522 | struct path path; |
573 | const char *path; | 523 | const char *name; |
574 | unsigned int type; | 524 | unsigned int type; |
575 | unsigned int devid, magic; | 525 | unsigned int devid, magic; |
576 | int err = -ENOENT; | 526 | int err = -ENOENT; |
@@ -580,71 +530,46 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, | |||
580 | goto out; | 530 | goto out; |
581 | } | 531 | } |
582 | 532 | ||
583 | path = param->path; | 533 | name = param->path; |
584 | type = param->ismountpoint.in.type; | 534 | type = param->ismountpoint.in.type; |
585 | 535 | ||
586 | param->ismountpoint.out.devid = devid = 0; | 536 | param->ismountpoint.out.devid = devid = 0; |
587 | param->ismountpoint.out.magic = magic = 0; | 537 | param->ismountpoint.out.magic = magic = 0; |
588 | 538 | ||
589 | if (!fp || param->ioctlfd == -1) { | 539 | if (!fp || param->ioctlfd == -1) { |
590 | if (autofs_type_any(type)) { | 540 | if (autofs_type_any(type)) |
591 | struct super_block *sb; | 541 | err = kern_path(name, LOOKUP_FOLLOW, &path); |
592 | 542 | else | |
593 | err = path_lookup(path, LOOKUP_FOLLOW, &nd); | 543 | err = find_autofs_mount(name, &path, test_by_type, &type); |
594 | if (err) | 544 | if (err) |
595 | goto out; | 545 | goto out; |
596 | 546 | devid = new_encode_dev(path.mnt->mnt_sb->s_dev); | |
597 | sb = nd.path.dentry->d_sb; | ||
598 | devid = new_encode_dev(sb->s_dev); | ||
599 | } else { | ||
600 | struct autofs_info *ino; | ||
601 | |||
602 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
603 | if (err) | ||
604 | goto out; | ||
605 | |||
606 | err = autofs_dev_ioctl_find_sbi_type(&nd, type); | ||
607 | if (err) | ||
608 | goto out_release; | ||
609 | |||
610 | ino = autofs4_dentry_ino(nd.path.dentry); | ||
611 | devid = autofs4_get_dev(ino->sbi); | ||
612 | } | ||
613 | |||
614 | err = 0; | 547 | err = 0; |
615 | if (nd.path.dentry->d_inode && | 548 | if (path.dentry->d_inode && |
616 | nd.path.mnt->mnt_root == nd.path.dentry) { | 549 | path.mnt->mnt_root == path.dentry) { |
617 | err = 1; | 550 | err = 1; |
618 | magic = nd.path.dentry->d_inode->i_sb->s_magic; | 551 | magic = path.dentry->d_inode->i_sb->s_magic; |
619 | } | 552 | } |
620 | } else { | 553 | } else { |
621 | dev_t dev = autofs4_get_dev(sbi); | 554 | dev_t dev = sbi->sb->s_dev; |
622 | 555 | ||
623 | err = path_lookup(path, LOOKUP_PARENT, &nd); | 556 | err = find_autofs_mount(name, &path, test_by_dev, &dev); |
624 | if (err) | 557 | if (err) |
625 | goto out; | 558 | goto out; |
626 | 559 | ||
627 | err = autofs_dev_ioctl_find_super(&nd, dev); | 560 | devid = new_encode_dev(dev); |
628 | if (err) | ||
629 | goto out_release; | ||
630 | |||
631 | devid = dev; | ||
632 | 561 | ||
633 | err = have_submounts(nd.path.dentry); | 562 | err = have_submounts(path.dentry); |
634 | 563 | ||
635 | if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { | 564 | if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) { |
636 | if (follow_down(&nd.path.mnt, &nd.path.dentry)) { | 565 | if (follow_down(&path)) |
637 | struct inode *inode = nd.path.dentry->d_inode; | 566 | magic = path.mnt->mnt_sb->s_magic; |
638 | magic = inode->i_sb->s_magic; | ||
639 | } | ||
640 | } | 567 | } |
641 | } | 568 | } |
642 | 569 | ||
643 | param->ismountpoint.out.devid = devid; | 570 | param->ismountpoint.out.devid = devid; |
644 | param->ismountpoint.out.magic = magic; | 571 | param->ismountpoint.out.magic = magic; |
645 | 572 | path_put(&path); | |
646 | out_release: | ||
647 | path_put(&nd.path); | ||
648 | out: | 573 | out: |
649 | return err; | 574 | return err; |
650 | } | 575 | } |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 3077d8f16523..aa39ae83f019 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry, | |||
48 | static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | 48 | static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) |
49 | { | 49 | { |
50 | struct dentry *top = dentry; | 50 | struct dentry *top = dentry; |
51 | struct path path = {.mnt = mnt, .dentry = dentry}; | ||
51 | int status = 1; | 52 | int status = 1; |
52 | 53 | ||
53 | DPRINTK("dentry %p %.*s", | 54 | DPRINTK("dentry %p %.*s", |
54 | dentry, (int)dentry->d_name.len, dentry->d_name.name); | 55 | dentry, (int)dentry->d_name.len, dentry->d_name.name); |
55 | 56 | ||
56 | mntget(mnt); | 57 | path_get(&path); |
57 | dget(dentry); | ||
58 | 58 | ||
59 | if (!follow_down(&mnt, &dentry)) | 59 | if (!follow_down(&path)) |
60 | goto done; | 60 | goto done; |
61 | 61 | ||
62 | if (is_autofs4_dentry(dentry)) { | 62 | if (is_autofs4_dentry(path.dentry)) { |
63 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 63 | struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb); |
64 | 64 | ||
65 | /* This is an autofs submount, we can't expire it */ | 65 | /* This is an autofs submount, we can't expire it */ |
66 | if (autofs_type_indirect(sbi->type)) | 66 | if (autofs_type_indirect(sbi->type)) |
@@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
70 | * Otherwise it's an offset mount and we need to check | 70 | * Otherwise it's an offset mount and we need to check |
71 | * if we can umount its mount, if there is one. | 71 | * if we can umount its mount, if there is one. |
72 | */ | 72 | */ |
73 | if (!d_mountpoint(dentry)) { | 73 | if (!d_mountpoint(path.dentry)) { |
74 | status = 0; | 74 | status = 0; |
75 | goto done; | 75 | goto done; |
76 | } | 76 | } |
@@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
86 | status = 0; | 86 | status = 0; |
87 | done: | 87 | done: |
88 | DPRINTK("returning = %d", status); | 88 | DPRINTK("returning = %d", status); |
89 | dput(dentry); | 89 | path_put(&path); |
90 | mntput(mnt); | ||
91 | return status; | 90 | return status; |
92 | } | 91 | } |
93 | 92 | ||
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e383bf0334f1..b96a3c57359d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
181 | nd->flags); | 181 | nd->flags); |
182 | /* | 182 | /* |
183 | * For an expire of a covered direct or offset mount we need | 183 | * For an expire of a covered direct or offset mount we need |
184 | * to beeak out of follow_down() at the autofs mount trigger | 184 | * to break out of follow_down() at the autofs mount trigger |
185 | * (d_mounted--), so we can see the expiring flag, and manage | 185 | * (d_mounted--), so we can see the expiring flag, and manage |
186 | * the blocking and following here until the expire is completed. | 186 | * the blocking and following here until the expire is completed. |
187 | */ | 187 | */ |
@@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
190 | if (ino->flags & AUTOFS_INF_EXPIRING) { | 190 | if (ino->flags & AUTOFS_INF_EXPIRING) { |
191 | spin_unlock(&sbi->fs_lock); | 191 | spin_unlock(&sbi->fs_lock); |
192 | /* Follow down to our covering mount. */ | 192 | /* Follow down to our covering mount. */ |
193 | if (!follow_down(&nd->path.mnt, &nd->path.dentry)) | 193 | if (!follow_down(&nd->path)) |
194 | goto done; | 194 | goto done; |
195 | goto follow; | 195 | goto follow; |
196 | } | 196 | } |
@@ -230,8 +230,7 @@ follow: | |||
230 | * to follow it. | 230 | * to follow it. |
231 | */ | 231 | */ |
232 | if (d_mountpoint(dentry)) { | 232 | if (d_mountpoint(dentry)) { |
233 | if (!autofs4_follow_mount(&nd->path.mnt, | 233 | if (!autofs4_follow_mount(&nd->path)) { |
234 | &nd->path.dentry)) { | ||
235 | status = -ENOENT; | 234 | status = -ENOENT; |
236 | goto out_error; | 235 | goto out_error; |
237 | } | 236 | } |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 76afd0d6b86c..9367b6297d84 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -737,6 +737,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
737 | static void | 737 | static void |
738 | befs_put_super(struct super_block *sb) | 738 | befs_put_super(struct super_block *sb) |
739 | { | 739 | { |
740 | lock_kernel(); | ||
741 | |||
740 | kfree(BEFS_SB(sb)->mount_opts.iocharset); | 742 | kfree(BEFS_SB(sb)->mount_opts.iocharset); |
741 | BEFS_SB(sb)->mount_opts.iocharset = NULL; | 743 | BEFS_SB(sb)->mount_opts.iocharset = NULL; |
742 | 744 | ||
@@ -747,7 +749,8 @@ befs_put_super(struct super_block *sb) | |||
747 | 749 | ||
748 | kfree(sb->s_fs_info); | 750 | kfree(sb->s_fs_info); |
749 | sb->s_fs_info = NULL; | 751 | sb->s_fs_info = NULL; |
750 | return; | 752 | |
753 | unlock_kernel(); | ||
751 | } | 754 | } |
752 | 755 | ||
753 | /* Allocate private field of the superblock, fill it. | 756 | /* Allocate private field of the superblock, fill it. |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 4dd1b623f937..54bd07d44e68 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -79,7 +79,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) | |||
79 | const struct file_operations bfs_dir_operations = { | 79 | const struct file_operations bfs_dir_operations = { |
80 | .read = generic_read_dir, | 80 | .read = generic_read_dir, |
81 | .readdir = bfs_readdir, | 81 | .readdir = bfs_readdir, |
82 | .fsync = file_fsync, | 82 | .fsync = simple_fsync, |
83 | .llseek = generic_file_llseek, | 83 | .llseek = generic_file_llseek, |
84 | }; | 84 | }; |
85 | 85 | ||
@@ -205,7 +205,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry) | |||
205 | inode->i_nlink = 1; | 205 | inode->i_nlink = 1; |
206 | } | 206 | } |
207 | de->ino = 0; | 207 | de->ino = 0; |
208 | mark_buffer_dirty(bh); | 208 | mark_buffer_dirty_inode(bh, dir); |
209 | dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; | 209 | dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; |
210 | mark_inode_dirty(dir); | 210 | mark_inode_dirty(dir); |
211 | inode->i_ctime = dir->i_ctime; | 211 | inode->i_ctime = dir->i_ctime; |
@@ -267,7 +267,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
267 | new_inode->i_ctime = CURRENT_TIME_SEC; | 267 | new_inode->i_ctime = CURRENT_TIME_SEC; |
268 | inode_dec_link_count(new_inode); | 268 | inode_dec_link_count(new_inode); |
269 | } | 269 | } |
270 | mark_buffer_dirty(old_bh); | 270 | mark_buffer_dirty_inode(old_bh, old_dir); |
271 | error = 0; | 271 | error = 0; |
272 | 272 | ||
273 | end_rename: | 273 | end_rename: |
@@ -320,7 +320,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name, | |||
320 | for (i = 0; i < BFS_NAMELEN; i++) | 320 | for (i = 0; i < BFS_NAMELEN; i++) |
321 | de->name[i] = | 321 | de->name[i] = |
322 | (i < namelen) ? name[i] : 0; | 322 | (i < namelen) ? name[i] : 0; |
323 | mark_buffer_dirty(bh); | 323 | mark_buffer_dirty_inode(bh, dir); |
324 | brelse(bh); | 324 | brelse(bh); |
325 | return 0; | 325 | return 0; |
326 | } | 326 | } |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index cc4062d12ca2..6f60336c6628 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -30,6 +30,7 @@ MODULE_LICENSE("GPL"); | |||
30 | #define dprintf(x...) | 30 | #define dprintf(x...) |
31 | #endif | 31 | #endif |
32 | 32 | ||
33 | static void bfs_write_super(struct super_block *s); | ||
33 | void dump_imap(const char *prefix, struct super_block *s); | 34 | void dump_imap(const char *prefix, struct super_block *s); |
34 | 35 | ||
35 | struct inode *bfs_iget(struct super_block *sb, unsigned long ino) | 36 | struct inode *bfs_iget(struct super_block *sb, unsigned long ino) |
@@ -97,14 +98,15 @@ error: | |||
97 | return ERR_PTR(-EIO); | 98 | return ERR_PTR(-EIO); |
98 | } | 99 | } |
99 | 100 | ||
100 | static int bfs_write_inode(struct inode *inode, int unused) | 101 | static int bfs_write_inode(struct inode *inode, int wait) |
101 | { | 102 | { |
103 | struct bfs_sb_info *info = BFS_SB(inode->i_sb); | ||
102 | unsigned int ino = (u16)inode->i_ino; | 104 | unsigned int ino = (u16)inode->i_ino; |
103 | unsigned long i_sblock; | 105 | unsigned long i_sblock; |
104 | struct bfs_inode *di; | 106 | struct bfs_inode *di; |
105 | struct buffer_head *bh; | 107 | struct buffer_head *bh; |
106 | int block, off; | 108 | int block, off; |
107 | struct bfs_sb_info *info = BFS_SB(inode->i_sb); | 109 | int err = 0; |
108 | 110 | ||
109 | dprintf("ino=%08x\n", ino); | 111 | dprintf("ino=%08x\n", ino); |
110 | 112 | ||
@@ -145,9 +147,14 @@ static int bfs_write_inode(struct inode *inode, int unused) | |||
145 | di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); | 147 | di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); |
146 | 148 | ||
147 | mark_buffer_dirty(bh); | 149 | mark_buffer_dirty(bh); |
150 | if (wait) { | ||
151 | sync_dirty_buffer(bh); | ||
152 | if (buffer_req(bh) && !buffer_uptodate(bh)) | ||
153 | err = -EIO; | ||
154 | } | ||
148 | brelse(bh); | 155 | brelse(bh); |
149 | mutex_unlock(&info->bfs_lock); | 156 | mutex_unlock(&info->bfs_lock); |
150 | return 0; | 157 | return err; |
151 | } | 158 | } |
152 | 159 | ||
153 | static void bfs_delete_inode(struct inode *inode) | 160 | static void bfs_delete_inode(struct inode *inode) |
@@ -209,6 +216,26 @@ static void bfs_delete_inode(struct inode *inode) | |||
209 | clear_inode(inode); | 216 | clear_inode(inode); |
210 | } | 217 | } |
211 | 218 | ||
219 | static int bfs_sync_fs(struct super_block *sb, int wait) | ||
220 | { | ||
221 | struct bfs_sb_info *info = BFS_SB(sb); | ||
222 | |||
223 | mutex_lock(&info->bfs_lock); | ||
224 | mark_buffer_dirty(info->si_sbh); | ||
225 | sb->s_dirt = 0; | ||
226 | mutex_unlock(&info->bfs_lock); | ||
227 | |||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | static void bfs_write_super(struct super_block *sb) | ||
232 | { | ||
233 | if (!(sb->s_flags & MS_RDONLY)) | ||
234 | bfs_sync_fs(sb, 1); | ||
235 | else | ||
236 | sb->s_dirt = 0; | ||
237 | } | ||
238 | |||
212 | static void bfs_put_super(struct super_block *s) | 239 | static void bfs_put_super(struct super_block *s) |
213 | { | 240 | { |
214 | struct bfs_sb_info *info = BFS_SB(s); | 241 | struct bfs_sb_info *info = BFS_SB(s); |
@@ -216,11 +243,18 @@ static void bfs_put_super(struct super_block *s) | |||
216 | if (!info) | 243 | if (!info) |
217 | return; | 244 | return; |
218 | 245 | ||
246 | lock_kernel(); | ||
247 | |||
248 | if (s->s_dirt) | ||
249 | bfs_write_super(s); | ||
250 | |||
219 | brelse(info->si_sbh); | 251 | brelse(info->si_sbh); |
220 | mutex_destroy(&info->bfs_lock); | 252 | mutex_destroy(&info->bfs_lock); |
221 | kfree(info->si_imap); | 253 | kfree(info->si_imap); |
222 | kfree(info); | 254 | kfree(info); |
223 | s->s_fs_info = NULL; | 255 | s->s_fs_info = NULL; |
256 | |||
257 | unlock_kernel(); | ||
224 | } | 258 | } |
225 | 259 | ||
226 | static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 260 | static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
@@ -240,17 +274,6 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
240 | return 0; | 274 | return 0; |
241 | } | 275 | } |
242 | 276 | ||
243 | static void bfs_write_super(struct super_block *s) | ||
244 | { | ||
245 | struct bfs_sb_info *info = BFS_SB(s); | ||
246 | |||
247 | mutex_lock(&info->bfs_lock); | ||
248 | if (!(s->s_flags & MS_RDONLY)) | ||
249 | mark_buffer_dirty(info->si_sbh); | ||
250 | s->s_dirt = 0; | ||
251 | mutex_unlock(&info->bfs_lock); | ||
252 | } | ||
253 | |||
254 | static struct kmem_cache *bfs_inode_cachep; | 277 | static struct kmem_cache *bfs_inode_cachep; |
255 | 278 | ||
256 | static struct inode *bfs_alloc_inode(struct super_block *sb) | 279 | static struct inode *bfs_alloc_inode(struct super_block *sb) |
@@ -298,6 +321,7 @@ static const struct super_operations bfs_sops = { | |||
298 | .delete_inode = bfs_delete_inode, | 321 | .delete_inode = bfs_delete_inode, |
299 | .put_super = bfs_put_super, | 322 | .put_super = bfs_put_super, |
300 | .write_super = bfs_write_super, | 323 | .write_super = bfs_write_super, |
324 | .sync_fs = bfs_sync_fs, | ||
301 | .statfs = bfs_statfs, | 325 | .statfs = bfs_statfs, |
302 | }; | 326 | }; |
303 | 327 | ||
@@ -26,10 +26,9 @@ | |||
26 | #include <linux/mempool.h> | 26 | #include <linux/mempool.h> |
27 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
28 | #include <linux/blktrace_api.h> | 28 | #include <linux/blktrace_api.h> |
29 | #include <trace/block.h> | ||
30 | #include <scsi/sg.h> /* for struct sg_iovec */ | 29 | #include <scsi/sg.h> /* for struct sg_iovec */ |
31 | 30 | ||
32 | DEFINE_TRACE(block_split); | 31 | #include <trace/events/block.h> |
33 | 32 | ||
34 | /* | 33 | /* |
35 | * Test patch to inline a certain number of bi_io_vec's inside the bio | 34 | * Test patch to inline a certain number of bi_io_vec's inside the bio |
@@ -499,11 +498,11 @@ int bio_get_nr_vecs(struct block_device *bdev) | |||
499 | struct request_queue *q = bdev_get_queue(bdev); | 498 | struct request_queue *q = bdev_get_queue(bdev); |
500 | int nr_pages; | 499 | int nr_pages; |
501 | 500 | ||
502 | nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; | 501 | nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; |
503 | if (nr_pages > q->max_phys_segments) | 502 | if (nr_pages > queue_max_phys_segments(q)) |
504 | nr_pages = q->max_phys_segments; | 503 | nr_pages = queue_max_phys_segments(q); |
505 | if (nr_pages > q->max_hw_segments) | 504 | if (nr_pages > queue_max_hw_segments(q)) |
506 | nr_pages = q->max_hw_segments; | 505 | nr_pages = queue_max_hw_segments(q); |
507 | 506 | ||
508 | return nr_pages; | 507 | return nr_pages; |
509 | } | 508 | } |
@@ -562,8 +561,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
562 | * make this too complex. | 561 | * make this too complex. |
563 | */ | 562 | */ |
564 | 563 | ||
565 | while (bio->bi_phys_segments >= q->max_phys_segments | 564 | while (bio->bi_phys_segments >= queue_max_phys_segments(q) |
566 | || bio->bi_phys_segments >= q->max_hw_segments) { | 565 | || bio->bi_phys_segments >= queue_max_hw_segments(q)) { |
567 | 566 | ||
568 | if (retried_segments) | 567 | if (retried_segments) |
569 | return 0; | 568 | return 0; |
@@ -634,7 +633,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
634 | int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, | 633 | int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, |
635 | unsigned int len, unsigned int offset) | 634 | unsigned int len, unsigned int offset) |
636 | { | 635 | { |
637 | return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors); | 636 | return __bio_add_page(q, bio, page, len, offset, |
637 | queue_max_hw_sectors(q)); | ||
638 | } | 638 | } |
639 | 639 | ||
640 | /** | 640 | /** |
@@ -654,7 +654,7 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
654 | unsigned int offset) | 654 | unsigned int offset) |
655 | { | 655 | { |
656 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); | 656 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); |
657 | return __bio_add_page(q, bio, page, len, offset, q->max_sectors); | 657 | return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q)); |
658 | } | 658 | } |
659 | 659 | ||
660 | struct bio_map_data { | 660 | struct bio_map_data { |
@@ -721,7 +721,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
721 | 721 | ||
722 | while (bv_len && iov_idx < iov_count) { | 722 | while (bv_len && iov_idx < iov_count) { |
723 | unsigned int bytes; | 723 | unsigned int bytes; |
724 | char *iov_addr; | 724 | char __user *iov_addr; |
725 | 725 | ||
726 | bytes = min_t(unsigned int, | 726 | bytes = min_t(unsigned int, |
727 | iov[iov_idx].iov_len - iov_off, bv_len); | 727 | iov[iov_idx].iov_len - iov_off, bv_len); |
@@ -1201,7 +1201,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
1201 | char *addr = page_address(bvec->bv_page); | 1201 | char *addr = page_address(bvec->bv_page); |
1202 | int len = bmd->iovecs[i].bv_len; | 1202 | int len = bmd->iovecs[i].bv_len; |
1203 | 1203 | ||
1204 | if (read && !err) | 1204 | if (read) |
1205 | memcpy(p, addr, len); | 1205 | memcpy(p, addr, len); |
1206 | 1206 | ||
1207 | __free_page(bvec->bv_page); | 1207 | __free_page(bvec->bv_page); |
@@ -1490,11 +1490,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
1490 | sector_t bio_sector_offset(struct bio *bio, unsigned short index, | 1490 | sector_t bio_sector_offset(struct bio *bio, unsigned short index, |
1491 | unsigned int offset) | 1491 | unsigned int offset) |
1492 | { | 1492 | { |
1493 | unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); | 1493 | unsigned int sector_sz; |
1494 | struct bio_vec *bv; | 1494 | struct bio_vec *bv; |
1495 | sector_t sectors; | 1495 | sector_t sectors; |
1496 | int i; | 1496 | int i; |
1497 | 1497 | ||
1498 | sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue); | ||
1498 | sectors = 0; | 1499 | sectors = 0; |
1499 | 1500 | ||
1500 | if (index >= bio->bi_idx) | 1501 | if (index >= bio->bi_idx) |
diff --git a/fs/block_dev.c b/fs/block_dev.c index f45dbc18dd17..3a6d4fb2a329 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/uio.h> | 25 | #include <linux/uio.h> |
26 | #include <linux/namei.h> | 26 | #include <linux/namei.h> |
27 | #include <linux/log2.h> | 27 | #include <linux/log2.h> |
28 | #include <linux/kmemleak.h> | ||
28 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
@@ -76,7 +77,7 @@ int set_blocksize(struct block_device *bdev, int size) | |||
76 | return -EINVAL; | 77 | return -EINVAL; |
77 | 78 | ||
78 | /* Size cannot be smaller than the size supported by the device */ | 79 | /* Size cannot be smaller than the size supported by the device */ |
79 | if (size < bdev_hardsect_size(bdev)) | 80 | if (size < bdev_logical_block_size(bdev)) |
80 | return -EINVAL; | 81 | return -EINVAL; |
81 | 82 | ||
82 | /* Don't change the size if it is same as current */ | 83 | /* Don't change the size if it is same as current */ |
@@ -106,7 +107,7 @@ EXPORT_SYMBOL(sb_set_blocksize); | |||
106 | 107 | ||
107 | int sb_min_blocksize(struct super_block *sb, int size) | 108 | int sb_min_blocksize(struct super_block *sb, int size) |
108 | { | 109 | { |
109 | int minsize = bdev_hardsect_size(sb->s_bdev); | 110 | int minsize = bdev_logical_block_size(sb->s_bdev); |
110 | if (size < minsize) | 111 | if (size < minsize) |
111 | size = minsize; | 112 | size = minsize; |
112 | return sb_set_blocksize(sb, size); | 113 | return sb_set_blocksize(sb, size); |
@@ -175,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
175 | iov, offset, nr_segs, blkdev_get_blocks, NULL); | 176 | iov, offset, nr_segs, blkdev_get_blocks, NULL); |
176 | } | 177 | } |
177 | 178 | ||
179 | int __sync_blockdev(struct block_device *bdev, int wait) | ||
180 | { | ||
181 | if (!bdev) | ||
182 | return 0; | ||
183 | if (!wait) | ||
184 | return filemap_flush(bdev->bd_inode->i_mapping); | ||
185 | return filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
186 | } | ||
187 | |||
178 | /* | 188 | /* |
179 | * Write out and wait upon all the dirty data associated with a block | 189 | * Write out and wait upon all the dirty data associated with a block |
180 | * device via its mapping. Does not take the superblock lock. | 190 | * device via its mapping. Does not take the superblock lock. |
181 | */ | 191 | */ |
182 | int sync_blockdev(struct block_device *bdev) | 192 | int sync_blockdev(struct block_device *bdev) |
183 | { | 193 | { |
184 | int ret = 0; | 194 | return __sync_blockdev(bdev, 1); |
185 | |||
186 | if (bdev) | ||
187 | ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
188 | return ret; | ||
189 | } | 195 | } |
190 | EXPORT_SYMBOL(sync_blockdev); | 196 | EXPORT_SYMBOL(sync_blockdev); |
191 | 197 | ||
@@ -198,7 +204,7 @@ int fsync_bdev(struct block_device *bdev) | |||
198 | { | 204 | { |
199 | struct super_block *sb = get_super(bdev); | 205 | struct super_block *sb = get_super(bdev); |
200 | if (sb) { | 206 | if (sb) { |
201 | int res = fsync_super(sb); | 207 | int res = sync_filesystem(sb); |
202 | drop_super(sb); | 208 | drop_super(sb); |
203 | return res; | 209 | return res; |
204 | } | 210 | } |
@@ -240,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) | |||
240 | sb->s_frozen = SB_FREEZE_WRITE; | 246 | sb->s_frozen = SB_FREEZE_WRITE; |
241 | smp_wmb(); | 247 | smp_wmb(); |
242 | 248 | ||
243 | __fsync_super(sb); | 249 | sync_filesystem(sb); |
244 | 250 | ||
245 | sb->s_frozen = SB_FREEZE_TRANS; | 251 | sb->s_frozen = SB_FREEZE_TRANS; |
246 | smp_wmb(); | 252 | smp_wmb(); |
@@ -492,6 +498,11 @@ void __init bdev_cache_init(void) | |||
492 | bd_mnt = kern_mount(&bd_type); | 498 | bd_mnt = kern_mount(&bd_type); |
493 | if (IS_ERR(bd_mnt)) | 499 | if (IS_ERR(bd_mnt)) |
494 | panic("Cannot create bdev pseudo-fs"); | 500 | panic("Cannot create bdev pseudo-fs"); |
501 | /* | ||
502 | * This vfsmount structure is only used to obtain the | ||
503 | * blockdev_superblock, so tell kmemleak not to report it. | ||
504 | */ | ||
505 | kmemleak_not_leak(bd_mnt); | ||
495 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ | 506 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ |
496 | } | 507 | } |
497 | 508 | ||
@@ -1111,7 +1122,7 @@ EXPORT_SYMBOL(check_disk_change); | |||
1111 | 1122 | ||
1112 | void bd_set_size(struct block_device *bdev, loff_t size) | 1123 | void bd_set_size(struct block_device *bdev, loff_t size) |
1113 | { | 1124 | { |
1114 | unsigned bsize = bdev_hardsect_size(bdev); | 1125 | unsigned bsize = bdev_logical_block_size(bdev); |
1115 | 1126 | ||
1116 | bdev->bd_inode->i_size = size; | 1127 | bdev->bd_inode->i_size = size; |
1117 | while (bsize < PAGE_CACHE_SIZE) { | 1128 | while (bsize < PAGE_CACHE_SIZE) { |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 94212844a9bc..a35eb36b32fd 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
10 | compression.o delayed-ref.o | 10 | compression.o delayed-ref.o relocation.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index cbba000dccbe..603972576f0f 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -351,9 +351,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
351 | return 0; | 351 | return 0; |
352 | } | 352 | } |
353 | 353 | ||
354 | int btrfs_check_acl(struct inode *inode, int mask) | ||
355 | { | ||
356 | return 0; | ||
357 | } | ||
358 | |||
359 | #endif /* CONFIG_FS_POSIX_ACL */ | 354 | #endif /* CONFIG_FS_POSIX_ACL */ |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 502c3d61de62..7f88628a1a72 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -294,10 +294,10 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
294 | INIT_LIST_HEAD(&worker->worker_list); | 294 | INIT_LIST_HEAD(&worker->worker_list); |
295 | spin_lock_init(&worker->lock); | 295 | spin_lock_init(&worker->lock); |
296 | atomic_set(&worker->num_pending, 0); | 296 | atomic_set(&worker->num_pending, 0); |
297 | worker->workers = workers; | ||
297 | worker->task = kthread_run(worker_loop, worker, | 298 | worker->task = kthread_run(worker_loop, worker, |
298 | "btrfs-%s-%d", workers->name, | 299 | "btrfs-%s-%d", workers->name, |
299 | workers->num_workers + i); | 300 | workers->num_workers + i); |
300 | worker->workers = workers; | ||
301 | if (IS_ERR(worker->task)) { | 301 | if (IS_ERR(worker->task)) { |
302 | kfree(worker); | 302 | kfree(worker); |
303 | ret = PTR_ERR(worker->task); | 303 | ret = PTR_ERR(worker->task); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b30986f00b9d..acb4f3517582 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -72,6 +72,9 @@ struct btrfs_inode { | |||
72 | */ | 72 | */ |
73 | struct list_head ordered_operations; | 73 | struct list_head ordered_operations; |
74 | 74 | ||
75 | /* node for the red-black tree that links inodes in subvolume root */ | ||
76 | struct rb_node rb_node; | ||
77 | |||
75 | /* the space_info for where this inode's data allocations are done */ | 78 | /* the space_info for where this inode's data allocations are done */ |
76 | struct btrfs_space_info *space_info; | 79 | struct btrfs_space_info *space_info; |
77 | 80 | ||
@@ -154,5 +157,4 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) | |||
154 | BTRFS_I(inode)->disk_i_size = size; | 157 | BTRFS_I(inode)->disk_i_size = size; |
155 | } | 158 | } |
156 | 159 | ||
157 | |||
158 | #endif | 160 | #endif |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ab07627084f1..de1e2fd32080 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -123,7 +123,7 @@ static int check_compressed_csum(struct inode *inode, | |||
123 | u32 csum; | 123 | u32 csum; |
124 | u32 *cb_sum = &cb->sums; | 124 | u32 *cb_sum = &cb->sums; |
125 | 125 | ||
126 | if (btrfs_test_flag(inode, NODATASUM)) | 126 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
127 | return 0; | 127 | return 0; |
128 | 128 | ||
129 | for (i = 0; i < cb->nr_pages; i++) { | 129 | for (i = 0; i < cb->nr_pages; i++) { |
@@ -670,7 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
670 | */ | 670 | */ |
671 | atomic_inc(&cb->pending_bios); | 671 | atomic_inc(&cb->pending_bios); |
672 | 672 | ||
673 | if (!btrfs_test_flag(inode, NODATASUM)) { | 673 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
674 | btrfs_lookup_bio_sums(root, inode, comp_bio, | 674 | btrfs_lookup_bio_sums(root, inode, comp_bio, |
675 | sums); | 675 | sums); |
676 | } | 676 | } |
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
697 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 697 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); |
698 | BUG_ON(ret); | 698 | BUG_ON(ret); |
699 | 699 | ||
700 | if (!btrfs_test_flag(inode, NODATASUM)) | 700 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) |
701 | btrfs_lookup_bio_sums(root, inode, comp_bio, sums); | 701 | btrfs_lookup_bio_sums(root, inode, comp_bio, sums); |
702 | 702 | ||
703 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 703 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h deleted file mode 100644 index 6e1b3de36700..000000000000 --- a/fs/btrfs/crc32c.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_CRC32C__ | ||
20 | #define __BTRFS_CRC32C__ | ||
21 | #include <linux/crc32c.h> | ||
22 | |||
23 | /* | ||
24 | * this file used to do more for selecting the HW version of crc32c, | ||
25 | * perhaps it will one day again soon. | ||
26 | */ | ||
27 | #define btrfs_crc32c(seed, data, length) crc32c(seed, data, length) | ||
28 | #endif | ||
29 | |||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fedf8b9f03a2..60a45f3a4e91 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
197 | u32 nritems; | 197 | u32 nritems; |
198 | int ret = 0; | 198 | int ret = 0; |
199 | int level; | 199 | int level; |
200 | struct btrfs_root *new_root; | 200 | struct btrfs_disk_key disk_key; |
201 | |||
202 | new_root = kmalloc(sizeof(*new_root), GFP_NOFS); | ||
203 | if (!new_root) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | memcpy(new_root, root, sizeof(*new_root)); | ||
207 | new_root->root_key.objectid = new_root_objectid; | ||
208 | 201 | ||
209 | WARN_ON(root->ref_cows && trans->transid != | 202 | WARN_ON(root->ref_cows && trans->transid != |
210 | root->fs_info->running_transaction->transid); | 203 | root->fs_info->running_transaction->transid); |
@@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
212 | 205 | ||
213 | level = btrfs_header_level(buf); | 206 | level = btrfs_header_level(buf); |
214 | nritems = btrfs_header_nritems(buf); | 207 | nritems = btrfs_header_nritems(buf); |
208 | if (level == 0) | ||
209 | btrfs_item_key(buf, &disk_key, 0); | ||
210 | else | ||
211 | btrfs_node_key(buf, &disk_key, 0); | ||
215 | 212 | ||
216 | cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, | 213 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, |
217 | new_root_objectid, trans->transid, | 214 | new_root_objectid, &disk_key, level, |
218 | level, buf->start, 0); | 215 | buf->start, 0); |
219 | if (IS_ERR(cow)) { | 216 | if (IS_ERR(cow)) |
220 | kfree(new_root); | ||
221 | return PTR_ERR(cow); | 217 | return PTR_ERR(cow); |
222 | } | ||
223 | 218 | ||
224 | copy_extent_buffer(cow, buf, 0, 0, cow->len); | 219 | copy_extent_buffer(cow, buf, 0, 0, cow->len); |
225 | btrfs_set_header_bytenr(cow, cow->start); | 220 | btrfs_set_header_bytenr(cow, cow->start); |
226 | btrfs_set_header_generation(cow, trans->transid); | 221 | btrfs_set_header_generation(cow, trans->transid); |
227 | btrfs_set_header_owner(cow, new_root_objectid); | 222 | btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); |
228 | btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); | 223 | btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | |
224 | BTRFS_HEADER_FLAG_RELOC); | ||
225 | if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
226 | btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); | ||
227 | else | ||
228 | btrfs_set_header_owner(cow, new_root_objectid); | ||
229 | 229 | ||
230 | write_extent_buffer(cow, root->fs_info->fsid, | 230 | write_extent_buffer(cow, root->fs_info->fsid, |
231 | (unsigned long)btrfs_header_fsid(cow), | 231 | (unsigned long)btrfs_header_fsid(cow), |
232 | BTRFS_FSID_SIZE); | 232 | BTRFS_FSID_SIZE); |
233 | 233 | ||
234 | WARN_ON(btrfs_header_generation(buf) > trans->transid); | 234 | WARN_ON(btrfs_header_generation(buf) > trans->transid); |
235 | ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); | 235 | if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) |
236 | kfree(new_root); | 236 | ret = btrfs_inc_ref(trans, root, cow, 1); |
237 | else | ||
238 | ret = btrfs_inc_ref(trans, root, cow, 0); | ||
237 | 239 | ||
238 | if (ret) | 240 | if (ret) |
239 | return ret; | 241 | return ret; |
@@ -244,6 +246,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
244 | } | 246 | } |
245 | 247 | ||
246 | /* | 248 | /* |
249 | * check if the tree block can be shared by multiple trees | ||
250 | */ | ||
251 | int btrfs_block_can_be_shared(struct btrfs_root *root, | ||
252 | struct extent_buffer *buf) | ||
253 | { | ||
254 | /* | ||
255 | * Tree blocks not in refernece counted trees and tree roots | ||
256 | * are never shared. If a block was allocated after the last | ||
257 | * snapshot and the block was not allocated by tree relocation, | ||
258 | * we know the block is not shared. | ||
259 | */ | ||
260 | if (root->ref_cows && | ||
261 | buf != root->node && buf != root->commit_root && | ||
262 | (btrfs_header_generation(buf) <= | ||
263 | btrfs_root_last_snapshot(&root->root_item) || | ||
264 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) | ||
265 | return 1; | ||
266 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
267 | if (root->ref_cows && | ||
268 | btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) | ||
269 | return 1; | ||
270 | #endif | ||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | ||
275 | struct btrfs_root *root, | ||
276 | struct extent_buffer *buf, | ||
277 | struct extent_buffer *cow) | ||
278 | { | ||
279 | u64 refs; | ||
280 | u64 owner; | ||
281 | u64 flags; | ||
282 | u64 new_flags = 0; | ||
283 | int ret; | ||
284 | |||
285 | /* | ||
286 | * Backrefs update rules: | ||
287 | * | ||
288 | * Always use full backrefs for extent pointers in tree block | ||
289 | * allocated by tree relocation. | ||
290 | * | ||
291 | * If a shared tree block is no longer referenced by its owner | ||
292 | * tree (btrfs_header_owner(buf) == root->root_key.objectid), | ||
293 | * use full backrefs for extent pointers in tree block. | ||
294 | * | ||
295 | * If a tree block is been relocating | ||
296 | * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), | ||
297 | * use full backrefs for extent pointers in tree block. | ||
298 | * The reason for this is some operations (such as drop tree) | ||
299 | * are only allowed for blocks use full backrefs. | ||
300 | */ | ||
301 | |||
302 | if (btrfs_block_can_be_shared(root, buf)) { | ||
303 | ret = btrfs_lookup_extent_info(trans, root, buf->start, | ||
304 | buf->len, &refs, &flags); | ||
305 | BUG_ON(ret); | ||
306 | BUG_ON(refs == 0); | ||
307 | } else { | ||
308 | refs = 1; | ||
309 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || | ||
310 | btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) | ||
311 | flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
312 | else | ||
313 | flags = 0; | ||
314 | } | ||
315 | |||
316 | owner = btrfs_header_owner(buf); | ||
317 | BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && | ||
318 | !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); | ||
319 | |||
320 | if (refs > 1) { | ||
321 | if ((owner == root->root_key.objectid || | ||
322 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && | ||
323 | !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { | ||
324 | ret = btrfs_inc_ref(trans, root, buf, 1); | ||
325 | BUG_ON(ret); | ||
326 | |||
327 | if (root->root_key.objectid == | ||
328 | BTRFS_TREE_RELOC_OBJECTID) { | ||
329 | ret = btrfs_dec_ref(trans, root, buf, 0); | ||
330 | BUG_ON(ret); | ||
331 | ret = btrfs_inc_ref(trans, root, cow, 1); | ||
332 | BUG_ON(ret); | ||
333 | } | ||
334 | new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
335 | } else { | ||
336 | |||
337 | if (root->root_key.objectid == | ||
338 | BTRFS_TREE_RELOC_OBJECTID) | ||
339 | ret = btrfs_inc_ref(trans, root, cow, 1); | ||
340 | else | ||
341 | ret = btrfs_inc_ref(trans, root, cow, 0); | ||
342 | BUG_ON(ret); | ||
343 | } | ||
344 | if (new_flags != 0) { | ||
345 | ret = btrfs_set_disk_extent_flags(trans, root, | ||
346 | buf->start, | ||
347 | buf->len, | ||
348 | new_flags, 0); | ||
349 | BUG_ON(ret); | ||
350 | } | ||
351 | } else { | ||
352 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
353 | if (root->root_key.objectid == | ||
354 | BTRFS_TREE_RELOC_OBJECTID) | ||
355 | ret = btrfs_inc_ref(trans, root, cow, 1); | ||
356 | else | ||
357 | ret = btrfs_inc_ref(trans, root, cow, 0); | ||
358 | BUG_ON(ret); | ||
359 | ret = btrfs_dec_ref(trans, root, buf, 1); | ||
360 | BUG_ON(ret); | ||
361 | } | ||
362 | clean_tree_block(trans, root, buf); | ||
363 | } | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | /* | ||
247 | * does the dirty work in cow of a single block. The parent block (if | 368 | * does the dirty work in cow of a single block. The parent block (if |
248 | * supplied) is updated to point to the new cow copy. The new buffer is marked | 369 | * supplied) is updated to point to the new cow copy. The new buffer is marked |
249 | * dirty and returned locked. If you modify the block it needs to be marked | 370 | * dirty and returned locked. If you modify the block it needs to be marked |
@@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
262 | struct extent_buffer **cow_ret, | 383 | struct extent_buffer **cow_ret, |
263 | u64 search_start, u64 empty_size) | 384 | u64 search_start, u64 empty_size) |
264 | { | 385 | { |
265 | u64 parent_start; | 386 | struct btrfs_disk_key disk_key; |
266 | struct extent_buffer *cow; | 387 | struct extent_buffer *cow; |
267 | u32 nritems; | ||
268 | int ret = 0; | ||
269 | int level; | 388 | int level; |
270 | int unlock_orig = 0; | 389 | int unlock_orig = 0; |
390 | u64 parent_start; | ||
271 | 391 | ||
272 | if (*cow_ret == buf) | 392 | if (*cow_ret == buf) |
273 | unlock_orig = 1; | 393 | unlock_orig = 1; |
274 | 394 | ||
275 | btrfs_assert_tree_locked(buf); | 395 | btrfs_assert_tree_locked(buf); |
276 | 396 | ||
277 | if (parent) | ||
278 | parent_start = parent->start; | ||
279 | else | ||
280 | parent_start = 0; | ||
281 | |||
282 | WARN_ON(root->ref_cows && trans->transid != | 397 | WARN_ON(root->ref_cows && trans->transid != |
283 | root->fs_info->running_transaction->transid); | 398 | root->fs_info->running_transaction->transid); |
284 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | 399 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); |
285 | 400 | ||
286 | level = btrfs_header_level(buf); | 401 | level = btrfs_header_level(buf); |
287 | nritems = btrfs_header_nritems(buf); | ||
288 | 402 | ||
289 | cow = btrfs_alloc_free_block(trans, root, buf->len, | 403 | if (level == 0) |
290 | parent_start, root->root_key.objectid, | 404 | btrfs_item_key(buf, &disk_key, 0); |
291 | trans->transid, level, | 405 | else |
292 | search_start, empty_size); | 406 | btrfs_node_key(buf, &disk_key, 0); |
407 | |||
408 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
409 | if (parent) | ||
410 | parent_start = parent->start; | ||
411 | else | ||
412 | parent_start = 0; | ||
413 | } else | ||
414 | parent_start = 0; | ||
415 | |||
416 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | ||
417 | root->root_key.objectid, &disk_key, | ||
418 | level, search_start, empty_size); | ||
293 | if (IS_ERR(cow)) | 419 | if (IS_ERR(cow)) |
294 | return PTR_ERR(cow); | 420 | return PTR_ERR(cow); |
295 | 421 | ||
@@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
298 | copy_extent_buffer(cow, buf, 0, 0, cow->len); | 424 | copy_extent_buffer(cow, buf, 0, 0, cow->len); |
299 | btrfs_set_header_bytenr(cow, cow->start); | 425 | btrfs_set_header_bytenr(cow, cow->start); |
300 | btrfs_set_header_generation(cow, trans->transid); | 426 | btrfs_set_header_generation(cow, trans->transid); |
301 | btrfs_set_header_owner(cow, root->root_key.objectid); | 427 | btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); |
302 | btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); | 428 | btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | |
429 | BTRFS_HEADER_FLAG_RELOC); | ||
430 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
431 | btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); | ||
432 | else | ||
433 | btrfs_set_header_owner(cow, root->root_key.objectid); | ||
303 | 434 | ||
304 | write_extent_buffer(cow, root->fs_info->fsid, | 435 | write_extent_buffer(cow, root->fs_info->fsid, |
305 | (unsigned long)btrfs_header_fsid(cow), | 436 | (unsigned long)btrfs_header_fsid(cow), |
306 | BTRFS_FSID_SIZE); | 437 | BTRFS_FSID_SIZE); |
307 | 438 | ||
308 | WARN_ON(btrfs_header_generation(buf) > trans->transid); | 439 | update_ref_for_cow(trans, root, buf, cow); |
309 | if (btrfs_header_generation(buf) != trans->transid) { | ||
310 | u32 nr_extents; | ||
311 | ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); | ||
312 | if (ret) | ||
313 | return ret; | ||
314 | |||
315 | ret = btrfs_cache_ref(trans, root, buf, nr_extents); | ||
316 | WARN_ON(ret); | ||
317 | } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) { | ||
318 | /* | ||
319 | * There are only two places that can drop reference to | ||
320 | * tree blocks owned by living reloc trees, one is here, | ||
321 | * the other place is btrfs_drop_subtree. In both places, | ||
322 | * we check reference count while tree block is locked. | ||
323 | * Furthermore, if reference count is one, it won't get | ||
324 | * increased by someone else. | ||
325 | */ | ||
326 | u32 refs; | ||
327 | ret = btrfs_lookup_extent_ref(trans, root, buf->start, | ||
328 | buf->len, &refs); | ||
329 | BUG_ON(ret); | ||
330 | if (refs == 1) { | ||
331 | ret = btrfs_update_ref(trans, root, buf, cow, | ||
332 | 0, nritems); | ||
333 | clean_tree_block(trans, root, buf); | ||
334 | } else { | ||
335 | ret = btrfs_inc_ref(trans, root, buf, cow, NULL); | ||
336 | } | ||
337 | BUG_ON(ret); | ||
338 | } else { | ||
339 | ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); | ||
340 | if (ret) | ||
341 | return ret; | ||
342 | clean_tree_block(trans, root, buf); | ||
343 | } | ||
344 | |||
345 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
346 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); | ||
347 | WARN_ON(ret); | ||
348 | } | ||
349 | 440 | ||
350 | if (buf == root->node) { | 441 | if (buf == root->node) { |
351 | WARN_ON(parent && parent != buf); | 442 | WARN_ON(parent && parent != buf); |
443 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || | ||
444 | btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) | ||
445 | parent_start = buf->start; | ||
446 | else | ||
447 | parent_start = 0; | ||
352 | 448 | ||
353 | spin_lock(&root->node_lock); | 449 | spin_lock(&root->node_lock); |
354 | root->node = cow; | 450 | root->node = cow; |
355 | extent_buffer_get(cow); | 451 | extent_buffer_get(cow); |
356 | spin_unlock(&root->node_lock); | 452 | spin_unlock(&root->node_lock); |
357 | 453 | ||
358 | if (buf != root->commit_root) { | 454 | btrfs_free_extent(trans, root, buf->start, buf->len, |
359 | btrfs_free_extent(trans, root, buf->start, | 455 | parent_start, root->root_key.objectid, |
360 | buf->len, buf->start, | 456 | level, 0); |
361 | root->root_key.objectid, | ||
362 | btrfs_header_generation(buf), | ||
363 | level, 1); | ||
364 | } | ||
365 | free_extent_buffer(buf); | 457 | free_extent_buffer(buf); |
366 | add_root_to_dirty_list(root); | 458 | add_root_to_dirty_list(root); |
367 | } else { | 459 | } else { |
460 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
461 | parent_start = parent->start; | ||
462 | else | ||
463 | parent_start = 0; | ||
464 | |||
465 | WARN_ON(trans->transid != btrfs_header_generation(parent)); | ||
368 | btrfs_set_node_blockptr(parent, parent_slot, | 466 | btrfs_set_node_blockptr(parent, parent_slot, |
369 | cow->start); | 467 | cow->start); |
370 | WARN_ON(trans->transid == 0); | ||
371 | btrfs_set_node_ptr_generation(parent, parent_slot, | 468 | btrfs_set_node_ptr_generation(parent, parent_slot, |
372 | trans->transid); | 469 | trans->transid); |
373 | btrfs_mark_buffer_dirty(parent); | 470 | btrfs_mark_buffer_dirty(parent); |
374 | WARN_ON(btrfs_header_generation(parent) != trans->transid); | ||
375 | btrfs_free_extent(trans, root, buf->start, buf->len, | 471 | btrfs_free_extent(trans, root, buf->start, buf->len, |
376 | parent_start, btrfs_header_owner(parent), | 472 | parent_start, root->root_key.objectid, |
377 | btrfs_header_generation(parent), level, 1); | 473 | level, 0); |
378 | } | 474 | } |
379 | if (unlock_orig) | 475 | if (unlock_orig) |
380 | btrfs_tree_unlock(buf); | 476 | btrfs_tree_unlock(buf); |
@@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
384 | return 0; | 480 | return 0; |
385 | } | 481 | } |
386 | 482 | ||
483 | static inline int should_cow_block(struct btrfs_trans_handle *trans, | ||
484 | struct btrfs_root *root, | ||
485 | struct extent_buffer *buf) | ||
486 | { | ||
487 | if (btrfs_header_generation(buf) == trans->transid && | ||
488 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && | ||
489 | !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && | ||
490 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) | ||
491 | return 0; | ||
492 | return 1; | ||
493 | } | ||
494 | |||
387 | /* | 495 | /* |
388 | * cows a single block, see __btrfs_cow_block for the real work. | 496 | * cows a single block, see __btrfs_cow_block for the real work. |
389 | * This version of it has extra checks so that a block isn't cow'd more than | 497 | * This version of it has extra checks so that a block isn't cow'd more than |
@@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
411 | WARN_ON(1); | 519 | WARN_ON(1); |
412 | } | 520 | } |
413 | 521 | ||
414 | if (btrfs_header_generation(buf) == trans->transid && | 522 | if (!should_cow_block(trans, root, buf)) { |
415 | btrfs_header_owner(buf) == root->root_key.objectid && | ||
416 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
417 | *cow_ret = buf; | 523 | *cow_ret = buf; |
418 | return 0; | 524 | return 0; |
419 | } | 525 | } |
@@ -469,7 +575,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) | |||
469 | /* | 575 | /* |
470 | * same as comp_keys only with two btrfs_key's | 576 | * same as comp_keys only with two btrfs_key's |
471 | */ | 577 | */ |
472 | static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) | 578 | int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) |
473 | { | 579 | { |
474 | if (k1->objectid > k2->objectid) | 580 | if (k1->objectid > k2->objectid) |
475 | return 1; | 581 | return 1; |
@@ -845,6 +951,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
845 | return -1; | 951 | return -1; |
846 | } | 952 | } |
847 | 953 | ||
954 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | ||
955 | int level, int *slot) | ||
956 | { | ||
957 | return bin_search(eb, key, level, slot); | ||
958 | } | ||
959 | |||
848 | /* given a node and slot number, this reads the blocks it points to. The | 960 | /* given a node and slot number, this reads the blocks it points to. The |
849 | * extent buffer is returned with a reference taken (but unlocked). | 961 | * extent buffer is returned with a reference taken (but unlocked). |
850 | * NULL is returned on error. | 962 | * NULL is returned on error. |
@@ -921,13 +1033,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
921 | root->node = child; | 1033 | root->node = child; |
922 | spin_unlock(&root->node_lock); | 1034 | spin_unlock(&root->node_lock); |
923 | 1035 | ||
924 | ret = btrfs_update_extent_ref(trans, root, child->start, | ||
925 | child->len, | ||
926 | mid->start, child->start, | ||
927 | root->root_key.objectid, | ||
928 | trans->transid, level - 1); | ||
929 | BUG_ON(ret); | ||
930 | |||
931 | add_root_to_dirty_list(root); | 1036 | add_root_to_dirty_list(root); |
932 | btrfs_tree_unlock(child); | 1037 | btrfs_tree_unlock(child); |
933 | 1038 | ||
@@ -938,9 +1043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
938 | /* once for the path */ | 1043 | /* once for the path */ |
939 | free_extent_buffer(mid); | 1044 | free_extent_buffer(mid); |
940 | ret = btrfs_free_extent(trans, root, mid->start, mid->len, | 1045 | ret = btrfs_free_extent(trans, root, mid->start, mid->len, |
941 | mid->start, root->root_key.objectid, | 1046 | 0, root->root_key.objectid, level, 1); |
942 | btrfs_header_generation(mid), | ||
943 | level, 1); | ||
944 | /* once for the root ptr */ | 1047 | /* once for the root ptr */ |
945 | free_extent_buffer(mid); | 1048 | free_extent_buffer(mid); |
946 | return ret; | 1049 | return ret; |
@@ -949,8 +1052,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
949 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1052 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
950 | return 0; | 1053 | return 0; |
951 | 1054 | ||
952 | if (trans->transaction->delayed_refs.flushing && | 1055 | if (btrfs_header_nritems(mid) > 2) |
953 | btrfs_header_nritems(mid) > 2) | ||
954 | return 0; | 1056 | return 0; |
955 | 1057 | ||
956 | if (btrfs_header_nritems(mid) < 2) | 1058 | if (btrfs_header_nritems(mid) < 2) |
@@ -998,7 +1100,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
998 | ret = wret; | 1100 | ret = wret; |
999 | if (btrfs_header_nritems(right) == 0) { | 1101 | if (btrfs_header_nritems(right) == 0) { |
1000 | u64 bytenr = right->start; | 1102 | u64 bytenr = right->start; |
1001 | u64 generation = btrfs_header_generation(parent); | ||
1002 | u32 blocksize = right->len; | 1103 | u32 blocksize = right->len; |
1003 | 1104 | ||
1004 | clean_tree_block(trans, root, right); | 1105 | clean_tree_block(trans, root, right); |
@@ -1010,9 +1111,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1010 | if (wret) | 1111 | if (wret) |
1011 | ret = wret; | 1112 | ret = wret; |
1012 | wret = btrfs_free_extent(trans, root, bytenr, | 1113 | wret = btrfs_free_extent(trans, root, bytenr, |
1013 | blocksize, parent->start, | 1114 | blocksize, 0, |
1014 | btrfs_header_owner(parent), | 1115 | root->root_key.objectid, |
1015 | generation, level, 1); | 1116 | level, 0); |
1016 | if (wret) | 1117 | if (wret) |
1017 | ret = wret; | 1118 | ret = wret; |
1018 | } else { | 1119 | } else { |
@@ -1047,7 +1148,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1047 | } | 1148 | } |
1048 | if (btrfs_header_nritems(mid) == 0) { | 1149 | if (btrfs_header_nritems(mid) == 0) { |
1049 | /* we've managed to empty the middle node, drop it */ | 1150 | /* we've managed to empty the middle node, drop it */ |
1050 | u64 root_gen = btrfs_header_generation(parent); | ||
1051 | u64 bytenr = mid->start; | 1151 | u64 bytenr = mid->start; |
1052 | u32 blocksize = mid->len; | 1152 | u32 blocksize = mid->len; |
1053 | 1153 | ||
@@ -1059,9 +1159,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1059 | if (wret) | 1159 | if (wret) |
1060 | ret = wret; | 1160 | ret = wret; |
1061 | wret = btrfs_free_extent(trans, root, bytenr, blocksize, | 1161 | wret = btrfs_free_extent(trans, root, bytenr, blocksize, |
1062 | parent->start, | 1162 | 0, root->root_key.objectid, |
1063 | btrfs_header_owner(parent), | 1163 | level, 0); |
1064 | root_gen, level, 1); | ||
1065 | if (wret) | 1164 | if (wret) |
1066 | ret = wret; | 1165 | ret = wret; |
1067 | } else { | 1166 | } else { |
@@ -1437,7 +1536,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1437 | { | 1536 | { |
1438 | int i; | 1537 | int i; |
1439 | 1538 | ||
1440 | if (path->keep_locks || path->lowest_level) | 1539 | if (path->keep_locks) |
1441 | return; | 1540 | return; |
1442 | 1541 | ||
1443 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | 1542 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { |
@@ -1552,7 +1651,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
1552 | } | 1651 | } |
1553 | b = p->nodes[level]; | 1652 | b = p->nodes[level]; |
1554 | } else if (ins_len < 0 && btrfs_header_nritems(b) < | 1653 | } else if (ins_len < 0 && btrfs_header_nritems(b) < |
1555 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | 1654 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { |
1556 | int sret; | 1655 | int sret; |
1557 | 1656 | ||
1558 | sret = reada_for_balance(root, p, level); | 1657 | sret = reada_for_balance(root, p, level); |
@@ -1614,10 +1713,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1614 | lowest_unlock = 2; | 1713 | lowest_unlock = 2; |
1615 | 1714 | ||
1616 | again: | 1715 | again: |
1617 | if (p->skip_locking) | 1716 | if (p->search_commit_root) { |
1618 | b = btrfs_root_node(root); | 1717 | b = root->commit_root; |
1619 | else | 1718 | extent_buffer_get(b); |
1620 | b = btrfs_lock_root_node(root); | 1719 | if (!p->skip_locking) |
1720 | btrfs_tree_lock(b); | ||
1721 | } else { | ||
1722 | if (p->skip_locking) | ||
1723 | b = btrfs_root_node(root); | ||
1724 | else | ||
1725 | b = btrfs_lock_root_node(root); | ||
1726 | } | ||
1621 | 1727 | ||
1622 | while (b) { | 1728 | while (b) { |
1623 | level = btrfs_header_level(b); | 1729 | level = btrfs_header_level(b); |
@@ -1638,11 +1744,9 @@ again: | |||
1638 | * then we don't want to set the path blocking, | 1744 | * then we don't want to set the path blocking, |
1639 | * so we test it here | 1745 | * so we test it here |
1640 | */ | 1746 | */ |
1641 | if (btrfs_header_generation(b) == trans->transid && | 1747 | if (!should_cow_block(trans, root, b)) |
1642 | btrfs_header_owner(b) == root->root_key.objectid && | ||
1643 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
1644 | goto cow_done; | 1748 | goto cow_done; |
1645 | } | 1749 | |
1646 | btrfs_set_path_blocking(p); | 1750 | btrfs_set_path_blocking(p); |
1647 | 1751 | ||
1648 | wret = btrfs_cow_block(trans, root, b, | 1752 | wret = btrfs_cow_block(trans, root, b, |
@@ -1764,138 +1868,6 @@ done: | |||
1764 | return ret; | 1868 | return ret; |
1765 | } | 1869 | } |
1766 | 1870 | ||
1767 | int btrfs_merge_path(struct btrfs_trans_handle *trans, | ||
1768 | struct btrfs_root *root, | ||
1769 | struct btrfs_key *node_keys, | ||
1770 | u64 *nodes, int lowest_level) | ||
1771 | { | ||
1772 | struct extent_buffer *eb; | ||
1773 | struct extent_buffer *parent; | ||
1774 | struct btrfs_key key; | ||
1775 | u64 bytenr; | ||
1776 | u64 generation; | ||
1777 | u32 blocksize; | ||
1778 | int level; | ||
1779 | int slot; | ||
1780 | int key_match; | ||
1781 | int ret; | ||
1782 | |||
1783 | eb = btrfs_lock_root_node(root); | ||
1784 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb); | ||
1785 | BUG_ON(ret); | ||
1786 | |||
1787 | btrfs_set_lock_blocking(eb); | ||
1788 | |||
1789 | parent = eb; | ||
1790 | while (1) { | ||
1791 | level = btrfs_header_level(parent); | ||
1792 | if (level == 0 || level <= lowest_level) | ||
1793 | break; | ||
1794 | |||
1795 | ret = bin_search(parent, &node_keys[lowest_level], level, | ||
1796 | &slot); | ||
1797 | if (ret && slot > 0) | ||
1798 | slot--; | ||
1799 | |||
1800 | bytenr = btrfs_node_blockptr(parent, slot); | ||
1801 | if (nodes[level - 1] == bytenr) | ||
1802 | break; | ||
1803 | |||
1804 | blocksize = btrfs_level_size(root, level - 1); | ||
1805 | generation = btrfs_node_ptr_generation(parent, slot); | ||
1806 | btrfs_node_key_to_cpu(eb, &key, slot); | ||
1807 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); | ||
1808 | |||
1809 | if (generation == trans->transid) { | ||
1810 | eb = read_tree_block(root, bytenr, blocksize, | ||
1811 | generation); | ||
1812 | btrfs_tree_lock(eb); | ||
1813 | btrfs_set_lock_blocking(eb); | ||
1814 | } | ||
1815 | |||
1816 | /* | ||
1817 | * if node keys match and node pointer hasn't been modified | ||
1818 | * in the running transaction, we can merge the path. for | ||
1819 | * blocks owened by reloc trees, the node pointer check is | ||
1820 | * skipped, this is because these blocks are fully controlled | ||
1821 | * by the space balance code, no one else can modify them. | ||
1822 | */ | ||
1823 | if (!nodes[level - 1] || !key_match || | ||
1824 | (generation == trans->transid && | ||
1825 | btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) { | ||
1826 | if (level == 1 || level == lowest_level + 1) { | ||
1827 | if (generation == trans->transid) { | ||
1828 | btrfs_tree_unlock(eb); | ||
1829 | free_extent_buffer(eb); | ||
1830 | } | ||
1831 | break; | ||
1832 | } | ||
1833 | |||
1834 | if (generation != trans->transid) { | ||
1835 | eb = read_tree_block(root, bytenr, blocksize, | ||
1836 | generation); | ||
1837 | btrfs_tree_lock(eb); | ||
1838 | btrfs_set_lock_blocking(eb); | ||
1839 | } | ||
1840 | |||
1841 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | ||
1842 | &eb); | ||
1843 | BUG_ON(ret); | ||
1844 | |||
1845 | if (root->root_key.objectid == | ||
1846 | BTRFS_TREE_RELOC_OBJECTID) { | ||
1847 | if (!nodes[level - 1]) { | ||
1848 | nodes[level - 1] = eb->start; | ||
1849 | memcpy(&node_keys[level - 1], &key, | ||
1850 | sizeof(node_keys[0])); | ||
1851 | } else { | ||
1852 | WARN_ON(1); | ||
1853 | } | ||
1854 | } | ||
1855 | |||
1856 | btrfs_tree_unlock(parent); | ||
1857 | free_extent_buffer(parent); | ||
1858 | parent = eb; | ||
1859 | continue; | ||
1860 | } | ||
1861 | |||
1862 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); | ||
1863 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); | ||
1864 | btrfs_mark_buffer_dirty(parent); | ||
1865 | |||
1866 | ret = btrfs_inc_extent_ref(trans, root, | ||
1867 | nodes[level - 1], | ||
1868 | blocksize, parent->start, | ||
1869 | btrfs_header_owner(parent), | ||
1870 | btrfs_header_generation(parent), | ||
1871 | level - 1); | ||
1872 | BUG_ON(ret); | ||
1873 | |||
1874 | /* | ||
1875 | * If the block was created in the running transaction, | ||
1876 | * it's possible this is the last reference to it, so we | ||
1877 | * should drop the subtree. | ||
1878 | */ | ||
1879 | if (generation == trans->transid) { | ||
1880 | ret = btrfs_drop_subtree(trans, root, eb, parent); | ||
1881 | BUG_ON(ret); | ||
1882 | btrfs_tree_unlock(eb); | ||
1883 | free_extent_buffer(eb); | ||
1884 | } else { | ||
1885 | ret = btrfs_free_extent(trans, root, bytenr, | ||
1886 | blocksize, parent->start, | ||
1887 | btrfs_header_owner(parent), | ||
1888 | btrfs_header_generation(parent), | ||
1889 | level - 1, 1); | ||
1890 | BUG_ON(ret); | ||
1891 | } | ||
1892 | break; | ||
1893 | } | ||
1894 | btrfs_tree_unlock(parent); | ||
1895 | free_extent_buffer(parent); | ||
1896 | return 0; | ||
1897 | } | ||
1898 | |||
1899 | /* | 1871 | /* |
1900 | * adjust the pointers going up the tree, starting at level | 1872 | * adjust the pointers going up the tree, starting at level |
1901 | * making sure the right key of each node is points to 'key'. | 1873 | * making sure the right key of each node is points to 'key'. |
@@ -2021,9 +1993,6 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
2021 | btrfs_mark_buffer_dirty(src); | 1993 | btrfs_mark_buffer_dirty(src); |
2022 | btrfs_mark_buffer_dirty(dst); | 1994 | btrfs_mark_buffer_dirty(dst); |
2023 | 1995 | ||
2024 | ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items); | ||
2025 | BUG_ON(ret); | ||
2026 | |||
2027 | return ret; | 1996 | return ret; |
2028 | } | 1997 | } |
2029 | 1998 | ||
@@ -2083,9 +2052,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
2083 | btrfs_mark_buffer_dirty(src); | 2052 | btrfs_mark_buffer_dirty(src); |
2084 | btrfs_mark_buffer_dirty(dst); | 2053 | btrfs_mark_buffer_dirty(dst); |
2085 | 2054 | ||
2086 | ret = btrfs_update_ref(trans, root, src, dst, 0, push_items); | ||
2087 | BUG_ON(ret); | ||
2088 | |||
2089 | return ret; | 2055 | return ret; |
2090 | } | 2056 | } |
2091 | 2057 | ||
@@ -2105,7 +2071,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2105 | struct extent_buffer *c; | 2071 | struct extent_buffer *c; |
2106 | struct extent_buffer *old; | 2072 | struct extent_buffer *old; |
2107 | struct btrfs_disk_key lower_key; | 2073 | struct btrfs_disk_key lower_key; |
2108 | int ret; | ||
2109 | 2074 | ||
2110 | BUG_ON(path->nodes[level]); | 2075 | BUG_ON(path->nodes[level]); |
2111 | BUG_ON(path->nodes[level-1] != root->node); | 2076 | BUG_ON(path->nodes[level-1] != root->node); |
@@ -2117,16 +2082,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2117 | btrfs_node_key(lower, &lower_key, 0); | 2082 | btrfs_node_key(lower, &lower_key, 0); |
2118 | 2083 | ||
2119 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2084 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2120 | root->root_key.objectid, trans->transid, | 2085 | root->root_key.objectid, &lower_key, |
2121 | level, root->node->start, 0); | 2086 | level, root->node->start, 0); |
2122 | if (IS_ERR(c)) | 2087 | if (IS_ERR(c)) |
2123 | return PTR_ERR(c); | 2088 | return PTR_ERR(c); |
2124 | 2089 | ||
2125 | memset_extent_buffer(c, 0, 0, root->nodesize); | 2090 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); |
2126 | btrfs_set_header_nritems(c, 1); | 2091 | btrfs_set_header_nritems(c, 1); |
2127 | btrfs_set_header_level(c, level); | 2092 | btrfs_set_header_level(c, level); |
2128 | btrfs_set_header_bytenr(c, c->start); | 2093 | btrfs_set_header_bytenr(c, c->start); |
2129 | btrfs_set_header_generation(c, trans->transid); | 2094 | btrfs_set_header_generation(c, trans->transid); |
2095 | btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); | ||
2130 | btrfs_set_header_owner(c, root->root_key.objectid); | 2096 | btrfs_set_header_owner(c, root->root_key.objectid); |
2131 | 2097 | ||
2132 | write_extent_buffer(c, root->fs_info->fsid, | 2098 | write_extent_buffer(c, root->fs_info->fsid, |
@@ -2151,12 +2117,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2151 | root->node = c; | 2117 | root->node = c; |
2152 | spin_unlock(&root->node_lock); | 2118 | spin_unlock(&root->node_lock); |
2153 | 2119 | ||
2154 | ret = btrfs_update_extent_ref(trans, root, lower->start, | ||
2155 | lower->len, lower->start, c->start, | ||
2156 | root->root_key.objectid, | ||
2157 | trans->transid, level - 1); | ||
2158 | BUG_ON(ret); | ||
2159 | |||
2160 | /* the super has an extra ref to root->node */ | 2120 | /* the super has an extra ref to root->node */ |
2161 | free_extent_buffer(old); | 2121 | free_extent_buffer(old); |
2162 | 2122 | ||
@@ -2233,7 +2193,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2233 | ret = insert_new_root(trans, root, path, level + 1); | 2193 | ret = insert_new_root(trans, root, path, level + 1); |
2234 | if (ret) | 2194 | if (ret) |
2235 | return ret; | 2195 | return ret; |
2236 | } else if (!trans->transaction->delayed_refs.flushing) { | 2196 | } else { |
2237 | ret = push_nodes_for_insert(trans, root, path, level); | 2197 | ret = push_nodes_for_insert(trans, root, path, level); |
2238 | c = path->nodes[level]; | 2198 | c = path->nodes[level]; |
2239 | if (!ret && btrfs_header_nritems(c) < | 2199 | if (!ret && btrfs_header_nritems(c) < |
@@ -2244,20 +2204,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2244 | } | 2204 | } |
2245 | 2205 | ||
2246 | c_nritems = btrfs_header_nritems(c); | 2206 | c_nritems = btrfs_header_nritems(c); |
2207 | mid = (c_nritems + 1) / 2; | ||
2208 | btrfs_node_key(c, &disk_key, mid); | ||
2247 | 2209 | ||
2248 | split = btrfs_alloc_free_block(trans, root, root->nodesize, | 2210 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2249 | path->nodes[level + 1]->start, | ||
2250 | root->root_key.objectid, | 2211 | root->root_key.objectid, |
2251 | trans->transid, level, c->start, 0); | 2212 | &disk_key, level, c->start, 0); |
2252 | if (IS_ERR(split)) | 2213 | if (IS_ERR(split)) |
2253 | return PTR_ERR(split); | 2214 | return PTR_ERR(split); |
2254 | 2215 | ||
2255 | btrfs_set_header_flags(split, btrfs_header_flags(c)); | 2216 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); |
2256 | btrfs_set_header_level(split, btrfs_header_level(c)); | 2217 | btrfs_set_header_level(split, btrfs_header_level(c)); |
2257 | btrfs_set_header_bytenr(split, split->start); | 2218 | btrfs_set_header_bytenr(split, split->start); |
2258 | btrfs_set_header_generation(split, trans->transid); | 2219 | btrfs_set_header_generation(split, trans->transid); |
2220 | btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); | ||
2259 | btrfs_set_header_owner(split, root->root_key.objectid); | 2221 | btrfs_set_header_owner(split, root->root_key.objectid); |
2260 | btrfs_set_header_flags(split, 0); | ||
2261 | write_extent_buffer(split, root->fs_info->fsid, | 2222 | write_extent_buffer(split, root->fs_info->fsid, |
2262 | (unsigned long)btrfs_header_fsid(split), | 2223 | (unsigned long)btrfs_header_fsid(split), |
2263 | BTRFS_FSID_SIZE); | 2224 | BTRFS_FSID_SIZE); |
@@ -2265,7 +2226,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2265 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 2226 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
2266 | BTRFS_UUID_SIZE); | 2227 | BTRFS_UUID_SIZE); |
2267 | 2228 | ||
2268 | mid = (c_nritems + 1) / 2; | ||
2269 | 2229 | ||
2270 | copy_extent_buffer(split, c, | 2230 | copy_extent_buffer(split, c, |
2271 | btrfs_node_key_ptr_offset(0), | 2231 | btrfs_node_key_ptr_offset(0), |
@@ -2278,16 +2238,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2278 | btrfs_mark_buffer_dirty(c); | 2238 | btrfs_mark_buffer_dirty(c); |
2279 | btrfs_mark_buffer_dirty(split); | 2239 | btrfs_mark_buffer_dirty(split); |
2280 | 2240 | ||
2281 | btrfs_node_key(split, &disk_key, 0); | ||
2282 | wret = insert_ptr(trans, root, path, &disk_key, split->start, | 2241 | wret = insert_ptr(trans, root, path, &disk_key, split->start, |
2283 | path->slots[level + 1] + 1, | 2242 | path->slots[level + 1] + 1, |
2284 | level + 1); | 2243 | level + 1); |
2285 | if (wret) | 2244 | if (wret) |
2286 | ret = wret; | 2245 | ret = wret; |
2287 | 2246 | ||
2288 | ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid); | ||
2289 | BUG_ON(ret); | ||
2290 | |||
2291 | if (path->slots[level] >= mid) { | 2247 | if (path->slots[level] >= mid) { |
2292 | path->slots[level] -= mid; | 2248 | path->slots[level] -= mid; |
2293 | btrfs_tree_unlock(c); | 2249 | btrfs_tree_unlock(c); |
@@ -2360,7 +2316,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2360 | u32 right_nritems; | 2316 | u32 right_nritems; |
2361 | u32 data_end; | 2317 | u32 data_end; |
2362 | u32 this_item_size; | 2318 | u32 this_item_size; |
2363 | int ret; | ||
2364 | 2319 | ||
2365 | if (empty) | 2320 | if (empty) |
2366 | nr = 0; | 2321 | nr = 0; |
@@ -2473,9 +2428,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2473 | btrfs_mark_buffer_dirty(left); | 2428 | btrfs_mark_buffer_dirty(left); |
2474 | btrfs_mark_buffer_dirty(right); | 2429 | btrfs_mark_buffer_dirty(right); |
2475 | 2430 | ||
2476 | ret = btrfs_update_ref(trans, root, left, right, 0, push_items); | ||
2477 | BUG_ON(ret); | ||
2478 | |||
2479 | btrfs_item_key(right, &disk_key, 0); | 2431 | btrfs_item_key(right, &disk_key, 0); |
2480 | btrfs_set_node_key(upper, &disk_key, slot + 1); | 2432 | btrfs_set_node_key(upper, &disk_key, slot + 1); |
2481 | btrfs_mark_buffer_dirty(upper); | 2433 | btrfs_mark_buffer_dirty(upper); |
@@ -2720,10 +2672,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2720 | if (right_nritems) | 2672 | if (right_nritems) |
2721 | btrfs_mark_buffer_dirty(right); | 2673 | btrfs_mark_buffer_dirty(right); |
2722 | 2674 | ||
2723 | ret = btrfs_update_ref(trans, root, right, left, | ||
2724 | old_left_nritems, push_items); | ||
2725 | BUG_ON(ret); | ||
2726 | |||
2727 | btrfs_item_key(right, &disk_key, 0); | 2675 | btrfs_item_key(right, &disk_key, 0); |
2728 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | 2676 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); |
2729 | if (wret) | 2677 | if (wret) |
@@ -2880,9 +2828,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
2880 | btrfs_mark_buffer_dirty(l); | 2828 | btrfs_mark_buffer_dirty(l); |
2881 | BUG_ON(path->slots[0] != slot); | 2829 | BUG_ON(path->slots[0] != slot); |
2882 | 2830 | ||
2883 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | ||
2884 | BUG_ON(ret); | ||
2885 | |||
2886 | if (mid <= slot) { | 2831 | if (mid <= slot) { |
2887 | btrfs_tree_unlock(path->nodes[0]); | 2832 | btrfs_tree_unlock(path->nodes[0]); |
2888 | free_extent_buffer(path->nodes[0]); | 2833 | free_extent_buffer(path->nodes[0]); |
@@ -2911,6 +2856,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2911 | struct btrfs_path *path, int data_size, | 2856 | struct btrfs_path *path, int data_size, |
2912 | int extend) | 2857 | int extend) |
2913 | { | 2858 | { |
2859 | struct btrfs_disk_key disk_key; | ||
2914 | struct extent_buffer *l; | 2860 | struct extent_buffer *l; |
2915 | u32 nritems; | 2861 | u32 nritems; |
2916 | int mid; | 2862 | int mid; |
@@ -2918,12 +2864,11 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2918 | struct extent_buffer *right; | 2864 | struct extent_buffer *right; |
2919 | int ret = 0; | 2865 | int ret = 0; |
2920 | int wret; | 2866 | int wret; |
2921 | int double_split; | 2867 | int split; |
2922 | int num_doubles = 0; | 2868 | int num_doubles = 0; |
2923 | 2869 | ||
2924 | /* first try to make some room by pushing left and right */ | 2870 | /* first try to make some room by pushing left and right */ |
2925 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY && | 2871 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { |
2926 | !trans->transaction->delayed_refs.flushing) { | ||
2927 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2872 | wret = push_leaf_right(trans, root, path, data_size, 0); |
2928 | if (wret < 0) | 2873 | if (wret < 0) |
2929 | return wret; | 2874 | return wret; |
@@ -2945,16 +2890,53 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2945 | return ret; | 2890 | return ret; |
2946 | } | 2891 | } |
2947 | again: | 2892 | again: |
2948 | double_split = 0; | 2893 | split = 1; |
2949 | l = path->nodes[0]; | 2894 | l = path->nodes[0]; |
2950 | slot = path->slots[0]; | 2895 | slot = path->slots[0]; |
2951 | nritems = btrfs_header_nritems(l); | 2896 | nritems = btrfs_header_nritems(l); |
2952 | mid = (nritems + 1) / 2; | 2897 | mid = (nritems + 1) / 2; |
2953 | 2898 | ||
2954 | right = btrfs_alloc_free_block(trans, root, root->leafsize, | 2899 | if (mid <= slot) { |
2955 | path->nodes[1]->start, | 2900 | if (nritems == 1 || |
2901 | leaf_space_used(l, mid, nritems - mid) + data_size > | ||
2902 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
2903 | if (slot >= nritems) { | ||
2904 | split = 0; | ||
2905 | } else { | ||
2906 | mid = slot; | ||
2907 | if (mid != nritems && | ||
2908 | leaf_space_used(l, mid, nritems - mid) + | ||
2909 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | ||
2910 | split = 2; | ||
2911 | } | ||
2912 | } | ||
2913 | } | ||
2914 | } else { | ||
2915 | if (leaf_space_used(l, 0, mid) + data_size > | ||
2916 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
2917 | if (!extend && data_size && slot == 0) { | ||
2918 | split = 0; | ||
2919 | } else if ((extend || !data_size) && slot == 0) { | ||
2920 | mid = 1; | ||
2921 | } else { | ||
2922 | mid = slot; | ||
2923 | if (mid != nritems && | ||
2924 | leaf_space_used(l, mid, nritems - mid) + | ||
2925 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | ||
2926 | split = 2 ; | ||
2927 | } | ||
2928 | } | ||
2929 | } | ||
2930 | } | ||
2931 | |||
2932 | if (split == 0) | ||
2933 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | ||
2934 | else | ||
2935 | btrfs_item_key(l, &disk_key, mid); | ||
2936 | |||
2937 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | ||
2956 | root->root_key.objectid, | 2938 | root->root_key.objectid, |
2957 | trans->transid, 0, l->start, 0); | 2939 | &disk_key, 0, l->start, 0); |
2958 | if (IS_ERR(right)) { | 2940 | if (IS_ERR(right)) { |
2959 | BUG_ON(1); | 2941 | BUG_ON(1); |
2960 | return PTR_ERR(right); | 2942 | return PTR_ERR(right); |
@@ -2963,6 +2945,7 @@ again: | |||
2963 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 2945 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
2964 | btrfs_set_header_bytenr(right, right->start); | 2946 | btrfs_set_header_bytenr(right, right->start); |
2965 | btrfs_set_header_generation(right, trans->transid); | 2947 | btrfs_set_header_generation(right, trans->transid); |
2948 | btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); | ||
2966 | btrfs_set_header_owner(right, root->root_key.objectid); | 2949 | btrfs_set_header_owner(right, root->root_key.objectid); |
2967 | btrfs_set_header_level(right, 0); | 2950 | btrfs_set_header_level(right, 0); |
2968 | write_extent_buffer(right, root->fs_info->fsid, | 2951 | write_extent_buffer(right, root->fs_info->fsid, |
@@ -2973,79 +2956,47 @@ again: | |||
2973 | (unsigned long)btrfs_header_chunk_tree_uuid(right), | 2956 | (unsigned long)btrfs_header_chunk_tree_uuid(right), |
2974 | BTRFS_UUID_SIZE); | 2957 | BTRFS_UUID_SIZE); |
2975 | 2958 | ||
2976 | if (mid <= slot) { | 2959 | if (split == 0) { |
2977 | if (nritems == 1 || | 2960 | if (mid <= slot) { |
2978 | leaf_space_used(l, mid, nritems - mid) + data_size > | 2961 | btrfs_set_header_nritems(right, 0); |
2979 | BTRFS_LEAF_DATA_SIZE(root)) { | 2962 | wret = insert_ptr(trans, root, path, |
2980 | if (slot >= nritems) { | 2963 | &disk_key, right->start, |
2981 | struct btrfs_disk_key disk_key; | 2964 | path->slots[1] + 1, 1); |
2982 | 2965 | if (wret) | |
2983 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | 2966 | ret = wret; |
2984 | btrfs_set_header_nritems(right, 0); | ||
2985 | wret = insert_ptr(trans, root, path, | ||
2986 | &disk_key, right->start, | ||
2987 | path->slots[1] + 1, 1); | ||
2988 | if (wret) | ||
2989 | ret = wret; | ||
2990 | 2967 | ||
2991 | btrfs_tree_unlock(path->nodes[0]); | 2968 | btrfs_tree_unlock(path->nodes[0]); |
2992 | free_extent_buffer(path->nodes[0]); | 2969 | free_extent_buffer(path->nodes[0]); |
2993 | path->nodes[0] = right; | 2970 | path->nodes[0] = right; |
2994 | path->slots[0] = 0; | 2971 | path->slots[0] = 0; |
2995 | path->slots[1] += 1; | 2972 | path->slots[1] += 1; |
2996 | btrfs_mark_buffer_dirty(right); | 2973 | } else { |
2997 | return ret; | 2974 | btrfs_set_header_nritems(right, 0); |
2998 | } | 2975 | wret = insert_ptr(trans, root, path, |
2999 | mid = slot; | 2976 | &disk_key, |
3000 | if (mid != nritems && | 2977 | right->start, |
3001 | leaf_space_used(l, mid, nritems - mid) + | 2978 | path->slots[1], 1); |
3002 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | 2979 | if (wret) |
3003 | double_split = 1; | 2980 | ret = wret; |
3004 | } | 2981 | btrfs_tree_unlock(path->nodes[0]); |
3005 | } | 2982 | free_extent_buffer(path->nodes[0]); |
3006 | } else { | 2983 | path->nodes[0] = right; |
3007 | if (leaf_space_used(l, 0, mid) + data_size > | 2984 | path->slots[0] = 0; |
3008 | BTRFS_LEAF_DATA_SIZE(root)) { | 2985 | if (path->slots[1] == 0) { |
3009 | if (!extend && data_size && slot == 0) { | 2986 | wret = fixup_low_keys(trans, root, |
3010 | struct btrfs_disk_key disk_key; | 2987 | path, &disk_key, 1); |
3011 | |||
3012 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | ||
3013 | btrfs_set_header_nritems(right, 0); | ||
3014 | wret = insert_ptr(trans, root, path, | ||
3015 | &disk_key, | ||
3016 | right->start, | ||
3017 | path->slots[1], 1); | ||
3018 | if (wret) | 2988 | if (wret) |
3019 | ret = wret; | 2989 | ret = wret; |
3020 | btrfs_tree_unlock(path->nodes[0]); | ||
3021 | free_extent_buffer(path->nodes[0]); | ||
3022 | path->nodes[0] = right; | ||
3023 | path->slots[0] = 0; | ||
3024 | if (path->slots[1] == 0) { | ||
3025 | wret = fixup_low_keys(trans, root, | ||
3026 | path, &disk_key, 1); | ||
3027 | if (wret) | ||
3028 | ret = wret; | ||
3029 | } | ||
3030 | btrfs_mark_buffer_dirty(right); | ||
3031 | return ret; | ||
3032 | } else if ((extend || !data_size) && slot == 0) { | ||
3033 | mid = 1; | ||
3034 | } else { | ||
3035 | mid = slot; | ||
3036 | if (mid != nritems && | ||
3037 | leaf_space_used(l, mid, nritems - mid) + | ||
3038 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | ||
3039 | double_split = 1; | ||
3040 | } | ||
3041 | } | 2990 | } |
3042 | } | 2991 | } |
2992 | btrfs_mark_buffer_dirty(right); | ||
2993 | return ret; | ||
3043 | } | 2994 | } |
3044 | 2995 | ||
3045 | ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); | 2996 | ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); |
3046 | BUG_ON(ret); | 2997 | BUG_ON(ret); |
3047 | 2998 | ||
3048 | if (double_split) { | 2999 | if (split == 2) { |
3049 | BUG_ON(num_doubles != 0); | 3000 | BUG_ON(num_doubles != 0); |
3050 | num_doubles++; | 3001 | num_doubles++; |
3051 | goto again; | 3002 | goto again; |
@@ -3447,7 +3398,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | |||
3447 | /* figure out how many keys we can insert in here */ | 3398 | /* figure out how many keys we can insert in here */ |
3448 | total_data = data_size[0]; | 3399 | total_data = data_size[0]; |
3449 | for (i = 1; i < nr; i++) { | 3400 | for (i = 1; i < nr; i++) { |
3450 | if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) | 3401 | if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) |
3451 | break; | 3402 | break; |
3452 | total_data += data_size[i]; | 3403 | total_data += data_size[i]; |
3453 | } | 3404 | } |
@@ -3745,9 +3696,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3745 | 3696 | ||
3746 | /* | 3697 | /* |
3747 | * a helper function to delete the leaf pointed to by path->slots[1] and | 3698 | * a helper function to delete the leaf pointed to by path->slots[1] and |
3748 | * path->nodes[1]. bytenr is the node block pointer, but since the callers | 3699 | * path->nodes[1]. |
3749 | * already know it, it is faster to have them pass it down than to | ||
3750 | * read it out of the node again. | ||
3751 | * | 3700 | * |
3752 | * This deletes the pointer in path->nodes[1] and frees the leaf | 3701 | * This deletes the pointer in path->nodes[1] and frees the leaf |
3753 | * block extent. zero is returned if it all worked out, < 0 otherwise. | 3702 | * block extent. zero is returned if it all worked out, < 0 otherwise. |
@@ -3755,15 +3704,14 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3755 | * The path must have already been setup for deleting the leaf, including | 3704 | * The path must have already been setup for deleting the leaf, including |
3756 | * all the proper balancing. path->nodes[1] must be locked. | 3705 | * all the proper balancing. path->nodes[1] must be locked. |
3757 | */ | 3706 | */ |
3758 | noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | 3707 | static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, |
3759 | struct btrfs_root *root, | 3708 | struct btrfs_root *root, |
3760 | struct btrfs_path *path, u64 bytenr) | 3709 | struct btrfs_path *path, |
3710 | struct extent_buffer *leaf) | ||
3761 | { | 3711 | { |
3762 | int ret; | 3712 | int ret; |
3763 | u64 root_gen = btrfs_header_generation(path->nodes[1]); | ||
3764 | u64 parent_start = path->nodes[1]->start; | ||
3765 | u64 parent_owner = btrfs_header_owner(path->nodes[1]); | ||
3766 | 3713 | ||
3714 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | ||
3767 | ret = del_ptr(trans, root, path, 1, path->slots[1]); | 3715 | ret = del_ptr(trans, root, path, 1, path->slots[1]); |
3768 | if (ret) | 3716 | if (ret) |
3769 | return ret; | 3717 | return ret; |
@@ -3774,10 +3722,8 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3774 | */ | 3722 | */ |
3775 | btrfs_unlock_up_safe(path, 0); | 3723 | btrfs_unlock_up_safe(path, 0); |
3776 | 3724 | ||
3777 | ret = btrfs_free_extent(trans, root, bytenr, | 3725 | ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, |
3778 | btrfs_level_size(root, 0), | 3726 | 0, root->root_key.objectid, 0, 0); |
3779 | parent_start, parent_owner, | ||
3780 | root_gen, 0, 1); | ||
3781 | return ret; | 3727 | return ret; |
3782 | } | 3728 | } |
3783 | /* | 3729 | /* |
@@ -3845,7 +3791,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3845 | if (leaf == root->node) { | 3791 | if (leaf == root->node) { |
3846 | btrfs_set_header_level(leaf, 0); | 3792 | btrfs_set_header_level(leaf, 0); |
3847 | } else { | 3793 | } else { |
3848 | ret = btrfs_del_leaf(trans, root, path, leaf->start); | 3794 | ret = btrfs_del_leaf(trans, root, path, leaf); |
3849 | BUG_ON(ret); | 3795 | BUG_ON(ret); |
3850 | } | 3796 | } |
3851 | } else { | 3797 | } else { |
@@ -3861,8 +3807,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3861 | } | 3807 | } |
3862 | 3808 | ||
3863 | /* delete the leaf if it is mostly empty */ | 3809 | /* delete the leaf if it is mostly empty */ |
3864 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 && | 3810 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) { |
3865 | !trans->transaction->delayed_refs.flushing) { | ||
3866 | /* push_leaf_left fixes the path. | 3811 | /* push_leaf_left fixes the path. |
3867 | * make sure the path still points to our leaf | 3812 | * make sure the path still points to our leaf |
3868 | * for possible call to del_ptr below | 3813 | * for possible call to del_ptr below |
@@ -3884,8 +3829,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3884 | 3829 | ||
3885 | if (btrfs_header_nritems(leaf) == 0) { | 3830 | if (btrfs_header_nritems(leaf) == 0) { |
3886 | path->slots[1] = slot; | 3831 | path->slots[1] = slot; |
3887 | ret = btrfs_del_leaf(trans, root, path, | 3832 | ret = btrfs_del_leaf(trans, root, path, leaf); |
3888 | leaf->start); | ||
3889 | BUG_ON(ret); | 3833 | BUG_ON(ret); |
3890 | free_extent_buffer(leaf); | 3834 | free_extent_buffer(leaf); |
3891 | } else { | 3835 | } else { |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4414a5d9983a..03441a99ea38 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -45,6 +45,8 @@ struct btrfs_ordered_sum; | |||
45 | 45 | ||
46 | #define BTRFS_MAX_LEVEL 8 | 46 | #define BTRFS_MAX_LEVEL 8 |
47 | 47 | ||
48 | #define BTRFS_COMPAT_EXTENT_TREE_V0 | ||
49 | |||
48 | /* | 50 | /* |
49 | * files bigger than this get some pre-flushing when they are added | 51 | * files bigger than this get some pre-flushing when they are added |
50 | * to the ordered operations list. That way we limit the total | 52 | * to the ordered operations list. That way we limit the total |
@@ -267,7 +269,18 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
267 | } | 269 | } |
268 | 270 | ||
269 | #define BTRFS_FSID_SIZE 16 | 271 | #define BTRFS_FSID_SIZE 16 |
270 | #define BTRFS_HEADER_FLAG_WRITTEN (1 << 0) | 272 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
273 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | ||
274 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | ||
275 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | ||
276 | |||
277 | #define BTRFS_BACKREF_REV_MAX 256 | ||
278 | #define BTRFS_BACKREF_REV_SHIFT 56 | ||
279 | #define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ | ||
280 | BTRFS_BACKREF_REV_SHIFT) | ||
281 | |||
282 | #define BTRFS_OLD_BACKREF_REV 0 | ||
283 | #define BTRFS_MIXED_BACKREF_REV 1 | ||
271 | 284 | ||
272 | /* | 285 | /* |
273 | * every tree block (leaf or node) starts with this header. | 286 | * every tree block (leaf or node) starts with this header. |
@@ -296,7 +309,6 @@ struct btrfs_header { | |||
296 | sizeof(struct btrfs_item) - \ | 309 | sizeof(struct btrfs_item) - \ |
297 | sizeof(struct btrfs_file_extent_item)) | 310 | sizeof(struct btrfs_file_extent_item)) |
298 | 311 | ||
299 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | ||
300 | 312 | ||
301 | /* | 313 | /* |
302 | * this is a very generous portion of the super block, giving us | 314 | * this is a very generous portion of the super block, giving us |
@@ -355,9 +367,12 @@ struct btrfs_super_block { | |||
355 | * Compat flags that we support. If any incompat flags are set other than the | 367 | * Compat flags that we support. If any incompat flags are set other than the |
356 | * ones specified below then we will fail to mount | 368 | * ones specified below then we will fail to mount |
357 | */ | 369 | */ |
358 | #define BTRFS_FEATURE_COMPAT_SUPP 0x0 | 370 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
359 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0x0 | 371 | |
360 | #define BTRFS_FEATURE_INCOMPAT_SUPP 0x0 | 372 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
373 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | ||
374 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | ||
375 | BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | ||
361 | 376 | ||
362 | /* | 377 | /* |
363 | * A leaf is full of items. offset and size tell us where to find | 378 | * A leaf is full of items. offset and size tell us where to find |
@@ -421,23 +436,65 @@ struct btrfs_path { | |||
421 | unsigned int keep_locks:1; | 436 | unsigned int keep_locks:1; |
422 | unsigned int skip_locking:1; | 437 | unsigned int skip_locking:1; |
423 | unsigned int leave_spinning:1; | 438 | unsigned int leave_spinning:1; |
439 | unsigned int search_commit_root:1; | ||
424 | }; | 440 | }; |
425 | 441 | ||
426 | /* | 442 | /* |
427 | * items in the extent btree are used to record the objectid of the | 443 | * items in the extent btree are used to record the objectid of the |
428 | * owner of the block and the number of references | 444 | * owner of the block and the number of references |
429 | */ | 445 | */ |
446 | |||
430 | struct btrfs_extent_item { | 447 | struct btrfs_extent_item { |
448 | __le64 refs; | ||
449 | __le64 generation; | ||
450 | __le64 flags; | ||
451 | } __attribute__ ((__packed__)); | ||
452 | |||
453 | struct btrfs_extent_item_v0 { | ||
431 | __le32 refs; | 454 | __le32 refs; |
432 | } __attribute__ ((__packed__)); | 455 | } __attribute__ ((__packed__)); |
433 | 456 | ||
434 | struct btrfs_extent_ref { | 457 | #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \ |
458 | sizeof(struct btrfs_item)) | ||
459 | |||
460 | #define BTRFS_EXTENT_FLAG_DATA (1ULL << 0) | ||
461 | #define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1) | ||
462 | |||
463 | /* following flags only apply to tree blocks */ | ||
464 | |||
465 | /* use full backrefs for extent pointers in the block */ | ||
466 | #define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) | ||
467 | |||
468 | struct btrfs_tree_block_info { | ||
469 | struct btrfs_disk_key key; | ||
470 | u8 level; | ||
471 | } __attribute__ ((__packed__)); | ||
472 | |||
473 | struct btrfs_extent_data_ref { | ||
474 | __le64 root; | ||
475 | __le64 objectid; | ||
476 | __le64 offset; | ||
477 | __le32 count; | ||
478 | } __attribute__ ((__packed__)); | ||
479 | |||
480 | struct btrfs_shared_data_ref { | ||
481 | __le32 count; | ||
482 | } __attribute__ ((__packed__)); | ||
483 | |||
484 | struct btrfs_extent_inline_ref { | ||
485 | u8 type; | ||
486 | u64 offset; | ||
487 | } __attribute__ ((__packed__)); | ||
488 | |||
489 | /* old style backrefs item */ | ||
490 | struct btrfs_extent_ref_v0 { | ||
435 | __le64 root; | 491 | __le64 root; |
436 | __le64 generation; | 492 | __le64 generation; |
437 | __le64 objectid; | 493 | __le64 objectid; |
438 | __le32 num_refs; | 494 | __le32 count; |
439 | } __attribute__ ((__packed__)); | 495 | } __attribute__ ((__packed__)); |
440 | 496 | ||
497 | |||
441 | /* dev extents record free space on individual devices. The owner | 498 | /* dev extents record free space on individual devices. The owner |
442 | * field points back to the chunk allocation mapping tree that allocated | 499 | * field points back to the chunk allocation mapping tree that allocated |
443 | * the extent. The chunk tree uuid field is a way to double check the owner | 500 | * the extent. The chunk tree uuid field is a way to double check the owner |
@@ -695,12 +752,7 @@ struct btrfs_block_group_cache { | |||
695 | struct list_head cluster_list; | 752 | struct list_head cluster_list; |
696 | }; | 753 | }; |
697 | 754 | ||
698 | struct btrfs_leaf_ref_tree { | 755 | struct reloc_control; |
699 | struct rb_root root; | ||
700 | struct list_head list; | ||
701 | spinlock_t lock; | ||
702 | }; | ||
703 | |||
704 | struct btrfs_device; | 756 | struct btrfs_device; |
705 | struct btrfs_fs_devices; | 757 | struct btrfs_fs_devices; |
706 | struct btrfs_fs_info { | 758 | struct btrfs_fs_info { |
@@ -831,18 +883,11 @@ struct btrfs_fs_info { | |||
831 | struct task_struct *cleaner_kthread; | 883 | struct task_struct *cleaner_kthread; |
832 | int thread_pool_size; | 884 | int thread_pool_size; |
833 | 885 | ||
834 | /* tree relocation relocated fields */ | ||
835 | struct list_head dead_reloc_roots; | ||
836 | struct btrfs_leaf_ref_tree reloc_ref_tree; | ||
837 | struct btrfs_leaf_ref_tree shared_ref_tree; | ||
838 | |||
839 | struct kobject super_kobj; | 886 | struct kobject super_kobj; |
840 | struct completion kobj_unregister; | 887 | struct completion kobj_unregister; |
841 | int do_barriers; | 888 | int do_barriers; |
842 | int closing; | 889 | int closing; |
843 | int log_root_recovering; | 890 | int log_root_recovering; |
844 | atomic_t throttles; | ||
845 | atomic_t throttle_gen; | ||
846 | 891 | ||
847 | u64 total_pinned; | 892 | u64 total_pinned; |
848 | 893 | ||
@@ -861,6 +906,8 @@ struct btrfs_fs_info { | |||
861 | */ | 906 | */ |
862 | struct list_head space_info; | 907 | struct list_head space_info; |
863 | 908 | ||
909 | struct reloc_control *reloc_ctl; | ||
910 | |||
864 | spinlock_t delalloc_lock; | 911 | spinlock_t delalloc_lock; |
865 | spinlock_t new_trans_lock; | 912 | spinlock_t new_trans_lock; |
866 | u64 delalloc_bytes; | 913 | u64 delalloc_bytes; |
@@ -891,7 +938,6 @@ struct btrfs_fs_info { | |||
891 | * in ram representation of the tree. extent_root is used for all allocations | 938 | * in ram representation of the tree. extent_root is used for all allocations |
892 | * and for the extent tree extent_root root. | 939 | * and for the extent tree extent_root root. |
893 | */ | 940 | */ |
894 | struct btrfs_dirty_root; | ||
895 | struct btrfs_root { | 941 | struct btrfs_root { |
896 | struct extent_buffer *node; | 942 | struct extent_buffer *node; |
897 | 943 | ||
@@ -899,9 +945,6 @@ struct btrfs_root { | |||
899 | spinlock_t node_lock; | 945 | spinlock_t node_lock; |
900 | 946 | ||
901 | struct extent_buffer *commit_root; | 947 | struct extent_buffer *commit_root; |
902 | struct btrfs_leaf_ref_tree *ref_tree; | ||
903 | struct btrfs_leaf_ref_tree ref_tree_struct; | ||
904 | struct btrfs_dirty_root *dirty_root; | ||
905 | struct btrfs_root *log_root; | 948 | struct btrfs_root *log_root; |
906 | struct btrfs_root *reloc_root; | 949 | struct btrfs_root *reloc_root; |
907 | 950 | ||
@@ -952,10 +995,15 @@ struct btrfs_root { | |||
952 | /* the dirty list is only used by non-reference counted roots */ | 995 | /* the dirty list is only used by non-reference counted roots */ |
953 | struct list_head dirty_list; | 996 | struct list_head dirty_list; |
954 | 997 | ||
998 | struct list_head root_list; | ||
999 | |||
955 | spinlock_t list_lock; | 1000 | spinlock_t list_lock; |
956 | struct list_head dead_list; | ||
957 | struct list_head orphan_list; | 1001 | struct list_head orphan_list; |
958 | 1002 | ||
1003 | spinlock_t inode_lock; | ||
1004 | /* red-black tree that keeps track of in-memory inodes */ | ||
1005 | struct rb_root inode_tree; | ||
1006 | |||
959 | /* | 1007 | /* |
960 | * right now this just gets used so that a root has its own devid | 1008 | * right now this just gets used so that a root has its own devid |
961 | * for stat. It may be used for more later | 1009 | * for stat. It may be used for more later |
@@ -1017,7 +1065,16 @@ struct btrfs_root { | |||
1017 | * are used, and how many references there are to each block | 1065 | * are used, and how many references there are to each block |
1018 | */ | 1066 | */ |
1019 | #define BTRFS_EXTENT_ITEM_KEY 168 | 1067 | #define BTRFS_EXTENT_ITEM_KEY 168 |
1020 | #define BTRFS_EXTENT_REF_KEY 180 | 1068 | |
1069 | #define BTRFS_TREE_BLOCK_REF_KEY 176 | ||
1070 | |||
1071 | #define BTRFS_EXTENT_DATA_REF_KEY 178 | ||
1072 | |||
1073 | #define BTRFS_EXTENT_REF_V0_KEY 180 | ||
1074 | |||
1075 | #define BTRFS_SHARED_BLOCK_REF_KEY 182 | ||
1076 | |||
1077 | #define BTRFS_SHARED_DATA_REF_KEY 184 | ||
1021 | 1078 | ||
1022 | /* | 1079 | /* |
1023 | * block groups give us hints into the extent allocation trees. Which | 1080 | * block groups give us hints into the extent allocation trees. Which |
@@ -1043,6 +1100,8 @@ struct btrfs_root { | |||
1043 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | 1100 | #define BTRFS_MOUNT_COMPRESS (1 << 5) |
1044 | #define BTRFS_MOUNT_NOTREELOG (1 << 6) | 1101 | #define BTRFS_MOUNT_NOTREELOG (1 << 6) |
1045 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) | 1102 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) |
1103 | #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) | ||
1104 | #define BTRFS_MOUNT_NOSSD (1 << 9) | ||
1046 | 1105 | ||
1047 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1106 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1048 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1107 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1056,12 +1115,14 @@ struct btrfs_root { | |||
1056 | #define BTRFS_INODE_READONLY (1 << 2) | 1115 | #define BTRFS_INODE_READONLY (1 << 2) |
1057 | #define BTRFS_INODE_NOCOMPRESS (1 << 3) | 1116 | #define BTRFS_INODE_NOCOMPRESS (1 << 3) |
1058 | #define BTRFS_INODE_PREALLOC (1 << 4) | 1117 | #define BTRFS_INODE_PREALLOC (1 << 4) |
1059 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ | 1118 | #define BTRFS_INODE_SYNC (1 << 5) |
1060 | ~BTRFS_INODE_##flag) | 1119 | #define BTRFS_INODE_IMMUTABLE (1 << 6) |
1061 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ | 1120 | #define BTRFS_INODE_APPEND (1 << 7) |
1062 | BTRFS_INODE_##flag) | 1121 | #define BTRFS_INODE_NODUMP (1 << 8) |
1063 | #define btrfs_test_flag(inode, flag) (BTRFS_I(inode)->flags & \ | 1122 | #define BTRFS_INODE_NOATIME (1 << 9) |
1064 | BTRFS_INODE_##flag) | 1123 | #define BTRFS_INODE_DIRSYNC (1 << 10) |
1124 | |||
1125 | |||
1065 | /* some macros to generate set/get funcs for the struct fields. This | 1126 | /* some macros to generate set/get funcs for the struct fields. This |
1066 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple | 1127 | * assumes there is a lefoo_to_cpu for every type, so lets make a simple |
1067 | * one for u8: | 1128 | * one for u8: |
@@ -1317,24 +1378,67 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) | |||
1317 | return (u8 *)((unsigned long)dev + ptr); | 1378 | return (u8 *)((unsigned long)dev + ptr); |
1318 | } | 1379 | } |
1319 | 1380 | ||
1320 | /* struct btrfs_extent_ref */ | 1381 | BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); |
1321 | BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); | 1382 | BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, |
1322 | BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); | 1383 | generation, 64); |
1323 | BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); | 1384 | BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); |
1324 | BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32); | ||
1325 | 1385 | ||
1326 | BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); | 1386 | BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32); |
1327 | BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, | 1387 | |
1328 | generation, 64); | 1388 | |
1329 | BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, | 1389 | BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8); |
1330 | objectid, 64); | 1390 | |
1331 | BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, | 1391 | static inline void btrfs_tree_block_key(struct extent_buffer *eb, |
1332 | num_refs, 32); | 1392 | struct btrfs_tree_block_info *item, |
1393 | struct btrfs_disk_key *key) | ||
1394 | { | ||
1395 | read_eb_member(eb, item, struct btrfs_tree_block_info, key, key); | ||
1396 | } | ||
1397 | |||
1398 | static inline void btrfs_set_tree_block_key(struct extent_buffer *eb, | ||
1399 | struct btrfs_tree_block_info *item, | ||
1400 | struct btrfs_disk_key *key) | ||
1401 | { | ||
1402 | write_eb_member(eb, item, struct btrfs_tree_block_info, key, key); | ||
1403 | } | ||
1404 | |||
1405 | BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref, | ||
1406 | root, 64); | ||
1407 | BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref, | ||
1408 | objectid, 64); | ||
1409 | BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref, | ||
1410 | offset, 64); | ||
1411 | BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, | ||
1412 | count, 32); | ||
1413 | |||
1414 | BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, | ||
1415 | count, 32); | ||
1333 | 1416 | ||
1334 | /* struct btrfs_extent_item */ | 1417 | BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref, |
1335 | BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); | 1418 | type, 8); |
1336 | BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, | 1419 | BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref, |
1337 | refs, 32); | 1420 | offset, 64); |
1421 | |||
1422 | static inline u32 btrfs_extent_inline_ref_size(int type) | ||
1423 | { | ||
1424 | if (type == BTRFS_TREE_BLOCK_REF_KEY || | ||
1425 | type == BTRFS_SHARED_BLOCK_REF_KEY) | ||
1426 | return sizeof(struct btrfs_extent_inline_ref); | ||
1427 | if (type == BTRFS_SHARED_DATA_REF_KEY) | ||
1428 | return sizeof(struct btrfs_shared_data_ref) + | ||
1429 | sizeof(struct btrfs_extent_inline_ref); | ||
1430 | if (type == BTRFS_EXTENT_DATA_REF_KEY) | ||
1431 | return sizeof(struct btrfs_extent_data_ref) + | ||
1432 | offsetof(struct btrfs_extent_inline_ref, offset); | ||
1433 | BUG(); | ||
1434 | return 0; | ||
1435 | } | ||
1436 | |||
1437 | BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64); | ||
1438 | BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0, | ||
1439 | generation, 64); | ||
1440 | BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64); | ||
1441 | BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32); | ||
1338 | 1442 | ||
1339 | /* struct btrfs_node */ | 1443 | /* struct btrfs_node */ |
1340 | BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); | 1444 | BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); |
@@ -1558,6 +1662,21 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) | |||
1558 | return (flags & flag) == flag; | 1662 | return (flags & flag) == flag; |
1559 | } | 1663 | } |
1560 | 1664 | ||
1665 | static inline int btrfs_header_backref_rev(struct extent_buffer *eb) | ||
1666 | { | ||
1667 | u64 flags = btrfs_header_flags(eb); | ||
1668 | return flags >> BTRFS_BACKREF_REV_SHIFT; | ||
1669 | } | ||
1670 | |||
1671 | static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, | ||
1672 | int rev) | ||
1673 | { | ||
1674 | u64 flags = btrfs_header_flags(eb); | ||
1675 | flags &= ~BTRFS_BACKREF_REV_MASK; | ||
1676 | flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT; | ||
1677 | btrfs_set_header_flags(eb, flags); | ||
1678 | } | ||
1679 | |||
1561 | static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) | 1680 | static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) |
1562 | { | 1681 | { |
1563 | unsigned long ptr = offsetof(struct btrfs_header, fsid); | 1682 | unsigned long ptr = offsetof(struct btrfs_header, fsid); |
@@ -1790,39 +1909,32 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
1790 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1909 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1791 | struct btrfs_root *root, struct extent_buffer *leaf); | 1910 | struct btrfs_root *root, struct extent_buffer *leaf); |
1792 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1911 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
1793 | struct btrfs_root *root, u64 objectid, u64 bytenr); | 1912 | struct btrfs_root *root, |
1913 | u64 objectid, u64 offset, u64 bytenr); | ||
1794 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | 1914 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); |
1795 | struct btrfs_block_group_cache *btrfs_lookup_block_group( | 1915 | struct btrfs_block_group_cache *btrfs_lookup_block_group( |
1796 | struct btrfs_fs_info *info, | 1916 | struct btrfs_fs_info *info, |
1797 | u64 bytenr); | 1917 | u64 bytenr); |
1918 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | ||
1798 | u64 btrfs_find_block_group(struct btrfs_root *root, | 1919 | u64 btrfs_find_block_group(struct btrfs_root *root, |
1799 | u64 search_start, u64 search_hint, int owner); | 1920 | u64 search_start, u64 search_hint, int owner); |
1800 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 1921 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
1801 | struct btrfs_root *root, | 1922 | struct btrfs_root *root, u32 blocksize, |
1802 | u32 blocksize, u64 parent, | 1923 | u64 parent, u64 root_objectid, |
1803 | u64 root_objectid, | 1924 | struct btrfs_disk_key *key, int level, |
1804 | u64 ref_generation, | 1925 | u64 hint, u64 empty_size); |
1805 | int level, | ||
1806 | u64 hint, | ||
1807 | u64 empty_size); | ||
1808 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 1926 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
1809 | struct btrfs_root *root, | 1927 | struct btrfs_root *root, |
1810 | u64 bytenr, u32 blocksize, | 1928 | u64 bytenr, u32 blocksize, |
1811 | int level); | 1929 | int level); |
1812 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | 1930 | int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
1813 | struct btrfs_root *root, | 1931 | struct btrfs_root *root, |
1814 | u64 num_bytes, u64 parent, u64 min_bytes, | 1932 | u64 root_objectid, u64 owner, |
1815 | u64 root_objectid, u64 ref_generation, | 1933 | u64 offset, struct btrfs_key *ins); |
1816 | u64 owner, u64 empty_size, u64 hint_byte, | 1934 | int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, |
1817 | u64 search_end, struct btrfs_key *ins, u64 data); | 1935 | struct btrfs_root *root, |
1818 | int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | 1936 | u64 root_objectid, u64 owner, u64 offset, |
1819 | struct btrfs_root *root, u64 parent, | 1937 | struct btrfs_key *ins); |
1820 | u64 root_objectid, u64 ref_generation, | ||
1821 | u64 owner, struct btrfs_key *ins); | ||
1822 | int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | ||
1823 | struct btrfs_root *root, u64 parent, | ||
1824 | u64 root_objectid, u64 ref_generation, | ||
1825 | u64 owner, struct btrfs_key *ins); | ||
1826 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | 1938 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
1827 | struct btrfs_root *root, | 1939 | struct btrfs_root *root, |
1828 | u64 num_bytes, u64 min_alloc_size, | 1940 | u64 num_bytes, u64 min_alloc_size, |
@@ -1830,18 +1942,18 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
1830 | u64 search_end, struct btrfs_key *ins, | 1942 | u64 search_end, struct btrfs_key *ins, |
1831 | u64 data); | 1943 | u64 data); |
1832 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1944 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1833 | struct extent_buffer *orig_buf, struct extent_buffer *buf, | 1945 | struct extent_buffer *buf, int full_backref); |
1834 | u32 *nr_extents); | 1946 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1835 | int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1947 | struct extent_buffer *buf, int full_backref); |
1836 | struct extent_buffer *buf, u32 nr_extents); | 1948 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, |
1837 | int btrfs_update_ref(struct btrfs_trans_handle *trans, | 1949 | struct btrfs_root *root, |
1838 | struct btrfs_root *root, struct extent_buffer *orig_buf, | 1950 | u64 bytenr, u64 num_bytes, u64 flags, |
1839 | struct extent_buffer *buf, int start_slot, int nr); | 1951 | int is_data); |
1840 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 1952 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
1841 | struct btrfs_root *root, | 1953 | struct btrfs_root *root, |
1842 | u64 bytenr, u64 num_bytes, u64 parent, | 1954 | u64 bytenr, u64 num_bytes, u64 parent, |
1843 | u64 root_objectid, u64 ref_generation, | 1955 | u64 root_objectid, u64 owner, u64 offset); |
1844 | u64 owner_objectid, int pin); | 1956 | |
1845 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 1957 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
1846 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 1958 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
1847 | struct btrfs_root *root, | 1959 | struct btrfs_root *root, |
@@ -1849,13 +1961,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
1849 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1961 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1850 | struct btrfs_root *root, | 1962 | struct btrfs_root *root, |
1851 | u64 bytenr, u64 num_bytes, u64 parent, | 1963 | u64 bytenr, u64 num_bytes, u64 parent, |
1852 | u64 root_objectid, u64 ref_generation, | 1964 | u64 root_objectid, u64 owner, u64 offset); |
1853 | u64 owner_objectid); | 1965 | |
1854 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | ||
1855 | struct btrfs_root *root, u64 bytenr, u64 num_bytes, | ||
1856 | u64 orig_parent, u64 parent, | ||
1857 | u64 root_objectid, u64 ref_generation, | ||
1858 | u64 owner_objectid); | ||
1859 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 1966 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
1860 | struct btrfs_root *root); | 1967 | struct btrfs_root *root); |
1861 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); | 1968 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); |
@@ -1867,16 +1974,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
1867 | u64 size); | 1974 | u64 size); |
1868 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 1975 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
1869 | struct btrfs_root *root, u64 group_start); | 1976 | struct btrfs_root *root, u64 group_start); |
1870 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | 1977 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, |
1871 | int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, | 1978 | struct btrfs_block_group_cache *group); |
1872 | struct btrfs_root *root); | 1979 | |
1873 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); | ||
1874 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | ||
1875 | struct btrfs_root *root, | ||
1876 | struct extent_buffer *buf, u64 orig_start); | ||
1877 | int btrfs_add_dead_reloc_root(struct btrfs_root *root); | ||
1878 | int btrfs_cleanup_reloc_trees(struct btrfs_root *root); | ||
1879 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | ||
1880 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 1980 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
1881 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 1981 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
1882 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 1982 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
@@ -1891,13 +1991,12 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | |||
1891 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 1991 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, |
1892 | u64 bytes); | 1992 | u64 bytes); |
1893 | /* ctree.c */ | 1993 | /* ctree.c */ |
1994 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | ||
1995 | int level, int *slot); | ||
1996 | int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2); | ||
1894 | int btrfs_previous_item(struct btrfs_root *root, | 1997 | int btrfs_previous_item(struct btrfs_root *root, |
1895 | struct btrfs_path *path, u64 min_objectid, | 1998 | struct btrfs_path *path, u64 min_objectid, |
1896 | int type); | 1999 | int type); |
1897 | int btrfs_merge_path(struct btrfs_trans_handle *trans, | ||
1898 | struct btrfs_root *root, | ||
1899 | struct btrfs_key *node_keys, | ||
1900 | u64 *nodes, int lowest_level); | ||
1901 | int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, | 2000 | int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, |
1902 | struct btrfs_root *root, struct btrfs_path *path, | 2001 | struct btrfs_root *root, struct btrfs_path *path, |
1903 | struct btrfs_key *new_key); | 2002 | struct btrfs_key *new_key); |
@@ -1918,6 +2017,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
1918 | struct btrfs_root *root, | 2017 | struct btrfs_root *root, |
1919 | struct extent_buffer *buf, | 2018 | struct extent_buffer *buf, |
1920 | struct extent_buffer **cow_ret, u64 new_root_objectid); | 2019 | struct extent_buffer **cow_ret, u64 new_root_objectid); |
2020 | int btrfs_block_can_be_shared(struct btrfs_root *root, | ||
2021 | struct extent_buffer *buf); | ||
1921 | int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root | 2022 | int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root |
1922 | *root, struct btrfs_path *path, u32 data_size); | 2023 | *root, struct btrfs_path *path, u32 data_size); |
1923 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, | 2024 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, |
@@ -1944,9 +2045,6 @@ void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | |||
1944 | 2045 | ||
1945 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2046 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1946 | struct btrfs_path *path, int slot, int nr); | 2047 | struct btrfs_path *path, int slot, int nr); |
1947 | int btrfs_del_leaf(struct btrfs_trans_handle *trans, | ||
1948 | struct btrfs_root *root, | ||
1949 | struct btrfs_path *path, u64 bytenr); | ||
1950 | static inline int btrfs_del_item(struct btrfs_trans_handle *trans, | 2048 | static inline int btrfs_del_item(struct btrfs_trans_handle *trans, |
1951 | struct btrfs_root *root, | 2049 | struct btrfs_root *root, |
1952 | struct btrfs_path *path) | 2050 | struct btrfs_path *path) |
@@ -2005,8 +2103,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
2005 | btrfs_root_item *item, struct btrfs_key *key); | 2103 | btrfs_root_item *item, struct btrfs_key *key); |
2006 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | 2104 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, |
2007 | u64 *found_objectid); | 2105 | u64 *found_objectid); |
2008 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, | 2106 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
2009 | struct btrfs_root *latest_root); | 2107 | int btrfs_set_root_node(struct btrfs_root_item *item, |
2108 | struct extent_buffer *node); | ||
2010 | /* dir-item.c */ | 2109 | /* dir-item.c */ |
2011 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | 2110 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, |
2012 | struct btrfs_root *root, const char *name, | 2111 | struct btrfs_root *root, const char *name, |
@@ -2139,7 +2238,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |||
2139 | int btrfs_readpage(struct file *file, struct page *page); | 2238 | int btrfs_readpage(struct file *file, struct page *page); |
2140 | void btrfs_delete_inode(struct inode *inode); | 2239 | void btrfs_delete_inode(struct inode *inode); |
2141 | void btrfs_put_inode(struct inode *inode); | 2240 | void btrfs_put_inode(struct inode *inode); |
2142 | void btrfs_read_locked_inode(struct inode *inode); | ||
2143 | int btrfs_write_inode(struct inode *inode, int wait); | 2241 | int btrfs_write_inode(struct inode *inode, int wait); |
2144 | void btrfs_dirty_inode(struct inode *inode); | 2242 | void btrfs_dirty_inode(struct inode *inode); |
2145 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2243 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
@@ -2147,12 +2245,8 @@ void btrfs_destroy_inode(struct inode *inode); | |||
2147 | int btrfs_init_cachep(void); | 2245 | int btrfs_init_cachep(void); |
2148 | void btrfs_destroy_cachep(void); | 2246 | void btrfs_destroy_cachep(void); |
2149 | long btrfs_ioctl_trans_end(struct file *file); | 2247 | long btrfs_ioctl_trans_end(struct file *file); |
2150 | struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, | ||
2151 | struct btrfs_root *root, int wait); | ||
2152 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | ||
2153 | struct btrfs_root *root); | ||
2154 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | 2248 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, |
2155 | struct btrfs_root *root, int *is_new); | 2249 | struct btrfs_root *root); |
2156 | int btrfs_commit_write(struct file *file, struct page *page, | 2250 | int btrfs_commit_write(struct file *file, struct page *page, |
2157 | unsigned from, unsigned to); | 2251 | unsigned from, unsigned to); |
2158 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | 2252 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, |
@@ -2168,6 +2262,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size); | |||
2168 | 2262 | ||
2169 | /* ioctl.c */ | 2263 | /* ioctl.c */ |
2170 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 2264 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
2265 | void btrfs_update_iflags(struct inode *inode); | ||
2266 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | ||
2171 | 2267 | ||
2172 | /* file.c */ | 2268 | /* file.c */ |
2173 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | 2269 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); |
@@ -2205,8 +2301,20 @@ int btrfs_parse_options(struct btrfs_root *root, char *options); | |||
2205 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2301 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2206 | 2302 | ||
2207 | /* acl.c */ | 2303 | /* acl.c */ |
2304 | #ifdef CONFIG_FS_POSIX_ACL | ||
2208 | int btrfs_check_acl(struct inode *inode, int mask); | 2305 | int btrfs_check_acl(struct inode *inode, int mask); |
2306 | #else | ||
2307 | #define btrfs_check_acl NULL | ||
2308 | #endif | ||
2209 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | 2309 | int btrfs_init_acl(struct inode *inode, struct inode *dir); |
2210 | int btrfs_acl_chmod(struct inode *inode); | 2310 | int btrfs_acl_chmod(struct inode *inode); |
2211 | 2311 | ||
2312 | /* relocation.c */ | ||
2313 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | ||
2314 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
2315 | struct btrfs_root *root); | ||
2316 | int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | ||
2317 | struct btrfs_root *root); | ||
2318 | int btrfs_recover_relocation(struct btrfs_root *root); | ||
2319 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | ||
2212 | #endif | 2320 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index d6c01c096a40..84e6781413b1 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -29,27 +29,87 @@ | |||
29 | * add extents in the middle of btrfs_search_slot, and it allows | 29 | * add extents in the middle of btrfs_search_slot, and it allows |
30 | * us to buffer up frequently modified backrefs in an rb tree instead | 30 | * us to buffer up frequently modified backrefs in an rb tree instead |
31 | * of hammering updates on the extent allocation tree. | 31 | * of hammering updates on the extent allocation tree. |
32 | * | ||
33 | * Right now this code is only used for reference counted trees, but | ||
34 | * the long term goal is to get rid of the similar code for delayed | ||
35 | * extent tree modifications. | ||
36 | */ | 32 | */ |
37 | 33 | ||
38 | /* | 34 | /* |
39 | * entries in the rb tree are ordered by the byte number of the extent | 35 | * compare two delayed tree backrefs with same bytenr and type |
40 | * and by the byte number of the parent block. | 36 | */ |
37 | static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, | ||
38 | struct btrfs_delayed_tree_ref *ref1) | ||
39 | { | ||
40 | if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { | ||
41 | if (ref1->root < ref2->root) | ||
42 | return -1; | ||
43 | if (ref1->root > ref2->root) | ||
44 | return 1; | ||
45 | } else { | ||
46 | if (ref1->parent < ref2->parent) | ||
47 | return -1; | ||
48 | if (ref1->parent > ref2->parent) | ||
49 | return 1; | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * compare two delayed data backrefs with same bytenr and type | ||
41 | */ | 56 | */ |
42 | static int comp_entry(struct btrfs_delayed_ref_node *ref, | 57 | static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, |
43 | u64 bytenr, u64 parent) | 58 | struct btrfs_delayed_data_ref *ref1) |
44 | { | 59 | { |
45 | if (bytenr < ref->bytenr) | 60 | if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { |
61 | if (ref1->root < ref2->root) | ||
62 | return -1; | ||
63 | if (ref1->root > ref2->root) | ||
64 | return 1; | ||
65 | if (ref1->objectid < ref2->objectid) | ||
66 | return -1; | ||
67 | if (ref1->objectid > ref2->objectid) | ||
68 | return 1; | ||
69 | if (ref1->offset < ref2->offset) | ||
70 | return -1; | ||
71 | if (ref1->offset > ref2->offset) | ||
72 | return 1; | ||
73 | } else { | ||
74 | if (ref1->parent < ref2->parent) | ||
75 | return -1; | ||
76 | if (ref1->parent > ref2->parent) | ||
77 | return 1; | ||
78 | } | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * entries in the rb tree are ordered by the byte number of the extent, | ||
84 | * type of the delayed backrefs and content of delayed backrefs. | ||
85 | */ | ||
86 | static int comp_entry(struct btrfs_delayed_ref_node *ref2, | ||
87 | struct btrfs_delayed_ref_node *ref1) | ||
88 | { | ||
89 | if (ref1->bytenr < ref2->bytenr) | ||
46 | return -1; | 90 | return -1; |
47 | if (bytenr > ref->bytenr) | 91 | if (ref1->bytenr > ref2->bytenr) |
48 | return 1; | 92 | return 1; |
49 | if (parent < ref->parent) | 93 | if (ref1->is_head && ref2->is_head) |
94 | return 0; | ||
95 | if (ref2->is_head) | ||
50 | return -1; | 96 | return -1; |
51 | if (parent > ref->parent) | 97 | if (ref1->is_head) |
52 | return 1; | 98 | return 1; |
99 | if (ref1->type < ref2->type) | ||
100 | return -1; | ||
101 | if (ref1->type > ref2->type) | ||
102 | return 1; | ||
103 | if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
104 | ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { | ||
105 | return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), | ||
106 | btrfs_delayed_node_to_tree_ref(ref1)); | ||
107 | } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY || | ||
108 | ref1->type == BTRFS_SHARED_DATA_REF_KEY) { | ||
109 | return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2), | ||
110 | btrfs_delayed_node_to_data_ref(ref1)); | ||
111 | } | ||
112 | BUG(); | ||
53 | return 0; | 113 | return 0; |
54 | } | 114 | } |
55 | 115 | ||
@@ -59,20 +119,21 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref, | |||
59 | * inserted. | 119 | * inserted. |
60 | */ | 120 | */ |
61 | static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, | 121 | static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, |
62 | u64 bytenr, u64 parent, | ||
63 | struct rb_node *node) | 122 | struct rb_node *node) |
64 | { | 123 | { |
65 | struct rb_node **p = &root->rb_node; | 124 | struct rb_node **p = &root->rb_node; |
66 | struct rb_node *parent_node = NULL; | 125 | struct rb_node *parent_node = NULL; |
67 | struct btrfs_delayed_ref_node *entry; | 126 | struct btrfs_delayed_ref_node *entry; |
127 | struct btrfs_delayed_ref_node *ins; | ||
68 | int cmp; | 128 | int cmp; |
69 | 129 | ||
130 | ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
70 | while (*p) { | 131 | while (*p) { |
71 | parent_node = *p; | 132 | parent_node = *p; |
72 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, | 133 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, |
73 | rb_node); | 134 | rb_node); |
74 | 135 | ||
75 | cmp = comp_entry(entry, bytenr, parent); | 136 | cmp = comp_entry(entry, ins); |
76 | if (cmp < 0) | 137 | if (cmp < 0) |
77 | p = &(*p)->rb_left; | 138 | p = &(*p)->rb_left; |
78 | else if (cmp > 0) | 139 | else if (cmp > 0) |
@@ -81,18 +142,17 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, | |||
81 | return entry; | 142 | return entry; |
82 | } | 143 | } |
83 | 144 | ||
84 | entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
85 | rb_link_node(node, parent_node, p); | 145 | rb_link_node(node, parent_node, p); |
86 | rb_insert_color(node, root); | 146 | rb_insert_color(node, root); |
87 | return NULL; | 147 | return NULL; |
88 | } | 148 | } |
89 | 149 | ||
90 | /* | 150 | /* |
91 | * find an entry based on (bytenr,parent). This returns the delayed | 151 | * find an head entry based on bytenr. This returns the delayed ref |
92 | * ref if it was able to find one, or NULL if nothing was in that spot | 152 | * head if it was able to find one, or NULL if nothing was in that spot |
93 | */ | 153 | */ |
94 | static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, | 154 | static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, |
95 | u64 bytenr, u64 parent, | 155 | u64 bytenr, |
96 | struct btrfs_delayed_ref_node **last) | 156 | struct btrfs_delayed_ref_node **last) |
97 | { | 157 | { |
98 | struct rb_node *n = root->rb_node; | 158 | struct rb_node *n = root->rb_node; |
@@ -105,7 +165,15 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, | |||
105 | if (last) | 165 | if (last) |
106 | *last = entry; | 166 | *last = entry; |
107 | 167 | ||
108 | cmp = comp_entry(entry, bytenr, parent); | 168 | if (bytenr < entry->bytenr) |
169 | cmp = -1; | ||
170 | else if (bytenr > entry->bytenr) | ||
171 | cmp = 1; | ||
172 | else if (!btrfs_delayed_ref_is_head(entry)) | ||
173 | cmp = 1; | ||
174 | else | ||
175 | cmp = 0; | ||
176 | |||
109 | if (cmp < 0) | 177 | if (cmp < 0) |
110 | n = n->rb_left; | 178 | n = n->rb_left; |
111 | else if (cmp > 0) | 179 | else if (cmp > 0) |
@@ -154,7 +222,7 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | |||
154 | node = rb_first(&delayed_refs->root); | 222 | node = rb_first(&delayed_refs->root); |
155 | } else { | 223 | } else { |
156 | ref = NULL; | 224 | ref = NULL; |
157 | tree_search(&delayed_refs->root, start, (u64)-1, &ref); | 225 | find_ref_head(&delayed_refs->root, start, &ref); |
158 | if (ref) { | 226 | if (ref) { |
159 | struct btrfs_delayed_ref_node *tmp; | 227 | struct btrfs_delayed_ref_node *tmp; |
160 | 228 | ||
@@ -234,7 +302,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) | |||
234 | delayed_refs = &trans->transaction->delayed_refs; | 302 | delayed_refs = &trans->transaction->delayed_refs; |
235 | spin_lock(&delayed_refs->lock); | 303 | spin_lock(&delayed_refs->lock); |
236 | 304 | ||
237 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | 305 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); |
238 | if (ref) { | 306 | if (ref) { |
239 | prev_node = rb_prev(&ref->rb_node); | 307 | prev_node = rb_prev(&ref->rb_node); |
240 | if (!prev_node) | 308 | if (!prev_node) |
@@ -250,25 +318,28 @@ out: | |||
250 | } | 318 | } |
251 | 319 | ||
252 | /* | 320 | /* |
253 | * helper function to lookup reference count | 321 | * helper function to lookup reference count and flags of extent. |
254 | * | 322 | * |
255 | * the head node for delayed ref is used to store the sum of all the | 323 | * the head node for delayed ref is used to store the sum of all the |
256 | * reference count modifications queued up in the rbtree. This way you | 324 | * reference count modifications queued up in the rbtree. the head |
257 | * can check to see what the reference count would be if all of the | 325 | * node may also store the extent flags to set. This way you can check |
258 | * delayed refs are processed. | 326 | * to see what the reference count and extent flags would be if all of |
327 | * the delayed refs are not processed. | ||
259 | */ | 328 | */ |
260 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | 329 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, |
261 | struct btrfs_root *root, u64 bytenr, | 330 | struct btrfs_root *root, u64 bytenr, |
262 | u64 num_bytes, u32 *refs) | 331 | u64 num_bytes, u64 *refs, u64 *flags) |
263 | { | 332 | { |
264 | struct btrfs_delayed_ref_node *ref; | 333 | struct btrfs_delayed_ref_node *ref; |
265 | struct btrfs_delayed_ref_head *head; | 334 | struct btrfs_delayed_ref_head *head; |
266 | struct btrfs_delayed_ref_root *delayed_refs; | 335 | struct btrfs_delayed_ref_root *delayed_refs; |
267 | struct btrfs_path *path; | 336 | struct btrfs_path *path; |
268 | struct extent_buffer *leaf; | ||
269 | struct btrfs_extent_item *ei; | 337 | struct btrfs_extent_item *ei; |
338 | struct extent_buffer *leaf; | ||
270 | struct btrfs_key key; | 339 | struct btrfs_key key; |
271 | u32 num_refs; | 340 | u32 item_size; |
341 | u64 num_refs; | ||
342 | u64 extent_flags; | ||
272 | int ret; | 343 | int ret; |
273 | 344 | ||
274 | path = btrfs_alloc_path(); | 345 | path = btrfs_alloc_path(); |
@@ -287,37 +358,60 @@ again: | |||
287 | 358 | ||
288 | if (ret == 0) { | 359 | if (ret == 0) { |
289 | leaf = path->nodes[0]; | 360 | leaf = path->nodes[0]; |
290 | ei = btrfs_item_ptr(leaf, path->slots[0], | 361 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
291 | struct btrfs_extent_item); | 362 | if (item_size >= sizeof(*ei)) { |
292 | num_refs = btrfs_extent_refs(leaf, ei); | 363 | ei = btrfs_item_ptr(leaf, path->slots[0], |
364 | struct btrfs_extent_item); | ||
365 | num_refs = btrfs_extent_refs(leaf, ei); | ||
366 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
367 | } else { | ||
368 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
369 | struct btrfs_extent_item_v0 *ei0; | ||
370 | BUG_ON(item_size != sizeof(*ei0)); | ||
371 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
372 | struct btrfs_extent_item_v0); | ||
373 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
374 | /* FIXME: this isn't correct for data */ | ||
375 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
376 | #else | ||
377 | BUG(); | ||
378 | #endif | ||
379 | } | ||
380 | BUG_ON(num_refs == 0); | ||
293 | } else { | 381 | } else { |
294 | num_refs = 0; | 382 | num_refs = 0; |
383 | extent_flags = 0; | ||
295 | ret = 0; | 384 | ret = 0; |
296 | } | 385 | } |
297 | 386 | ||
298 | spin_lock(&delayed_refs->lock); | 387 | spin_lock(&delayed_refs->lock); |
299 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | 388 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); |
300 | if (ref) { | 389 | if (ref) { |
301 | head = btrfs_delayed_node_to_head(ref); | 390 | head = btrfs_delayed_node_to_head(ref); |
302 | if (mutex_trylock(&head->mutex)) { | 391 | if (!mutex_trylock(&head->mutex)) { |
303 | num_refs += ref->ref_mod; | 392 | atomic_inc(&ref->refs); |
304 | mutex_unlock(&head->mutex); | 393 | spin_unlock(&delayed_refs->lock); |
305 | *refs = num_refs; | ||
306 | goto out; | ||
307 | } | ||
308 | 394 | ||
309 | atomic_inc(&ref->refs); | 395 | btrfs_release_path(root->fs_info->extent_root, path); |
310 | spin_unlock(&delayed_refs->lock); | ||
311 | 396 | ||
312 | btrfs_release_path(root->fs_info->extent_root, path); | 397 | mutex_lock(&head->mutex); |
398 | mutex_unlock(&head->mutex); | ||
399 | btrfs_put_delayed_ref(ref); | ||
400 | goto again; | ||
401 | } | ||
402 | if (head->extent_op && head->extent_op->update_flags) | ||
403 | extent_flags |= head->extent_op->flags_to_set; | ||
404 | else | ||
405 | BUG_ON(num_refs == 0); | ||
313 | 406 | ||
314 | mutex_lock(&head->mutex); | 407 | num_refs += ref->ref_mod; |
315 | mutex_unlock(&head->mutex); | 408 | mutex_unlock(&head->mutex); |
316 | btrfs_put_delayed_ref(ref); | ||
317 | goto again; | ||
318 | } else { | ||
319 | *refs = num_refs; | ||
320 | } | 409 | } |
410 | WARN_ON(num_refs == 0); | ||
411 | if (refs) | ||
412 | *refs = num_refs; | ||
413 | if (flags) | ||
414 | *flags = extent_flags; | ||
321 | out: | 415 | out: |
322 | spin_unlock(&delayed_refs->lock); | 416 | spin_unlock(&delayed_refs->lock); |
323 | btrfs_free_path(path); | 417 | btrfs_free_path(path); |
@@ -338,16 +432,7 @@ update_existing_ref(struct btrfs_trans_handle *trans, | |||
338 | struct btrfs_delayed_ref_node *existing, | 432 | struct btrfs_delayed_ref_node *existing, |
339 | struct btrfs_delayed_ref_node *update) | 433 | struct btrfs_delayed_ref_node *update) |
340 | { | 434 | { |
341 | struct btrfs_delayed_ref *existing_ref; | 435 | if (update->action != existing->action) { |
342 | struct btrfs_delayed_ref *ref; | ||
343 | |||
344 | existing_ref = btrfs_delayed_node_to_ref(existing); | ||
345 | ref = btrfs_delayed_node_to_ref(update); | ||
346 | |||
347 | if (ref->pin) | ||
348 | existing_ref->pin = 1; | ||
349 | |||
350 | if (ref->action != existing_ref->action) { | ||
351 | /* | 436 | /* |
352 | * this is effectively undoing either an add or a | 437 | * this is effectively undoing either an add or a |
353 | * drop. We decrement the ref_mod, and if it goes | 438 | * drop. We decrement the ref_mod, and if it goes |
@@ -363,20 +448,13 @@ update_existing_ref(struct btrfs_trans_handle *trans, | |||
363 | delayed_refs->num_entries--; | 448 | delayed_refs->num_entries--; |
364 | if (trans->delayed_ref_updates) | 449 | if (trans->delayed_ref_updates) |
365 | trans->delayed_ref_updates--; | 450 | trans->delayed_ref_updates--; |
451 | } else { | ||
452 | WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
453 | existing->type == BTRFS_SHARED_BLOCK_REF_KEY); | ||
366 | } | 454 | } |
367 | } else { | 455 | } else { |
368 | if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { | 456 | WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || |
369 | /* if we're adding refs, make sure all the | 457 | existing->type == BTRFS_SHARED_BLOCK_REF_KEY); |
370 | * details match up. The extent could | ||
371 | * have been totally freed and reallocated | ||
372 | * by a different owner before the delayed | ||
373 | * ref entries were removed. | ||
374 | */ | ||
375 | existing_ref->owner_objectid = ref->owner_objectid; | ||
376 | existing_ref->generation = ref->generation; | ||
377 | existing_ref->root = ref->root; | ||
378 | existing->num_bytes = update->num_bytes; | ||
379 | } | ||
380 | /* | 458 | /* |
381 | * the action on the existing ref matches | 459 | * the action on the existing ref matches |
382 | * the action on the ref we're trying to add. | 460 | * the action on the ref we're trying to add. |
@@ -401,6 +479,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
401 | 479 | ||
402 | existing_ref = btrfs_delayed_node_to_head(existing); | 480 | existing_ref = btrfs_delayed_node_to_head(existing); |
403 | ref = btrfs_delayed_node_to_head(update); | 481 | ref = btrfs_delayed_node_to_head(update); |
482 | BUG_ON(existing_ref->is_data != ref->is_data); | ||
404 | 483 | ||
405 | if (ref->must_insert_reserved) { | 484 | if (ref->must_insert_reserved) { |
406 | /* if the extent was freed and then | 485 | /* if the extent was freed and then |
@@ -420,6 +499,24 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
420 | 499 | ||
421 | } | 500 | } |
422 | 501 | ||
502 | if (ref->extent_op) { | ||
503 | if (!existing_ref->extent_op) { | ||
504 | existing_ref->extent_op = ref->extent_op; | ||
505 | } else { | ||
506 | if (ref->extent_op->update_key) { | ||
507 | memcpy(&existing_ref->extent_op->key, | ||
508 | &ref->extent_op->key, | ||
509 | sizeof(ref->extent_op->key)); | ||
510 | existing_ref->extent_op->update_key = 1; | ||
511 | } | ||
512 | if (ref->extent_op->update_flags) { | ||
513 | existing_ref->extent_op->flags_to_set |= | ||
514 | ref->extent_op->flags_to_set; | ||
515 | existing_ref->extent_op->update_flags = 1; | ||
516 | } | ||
517 | kfree(ref->extent_op); | ||
518 | } | ||
519 | } | ||
423 | /* | 520 | /* |
424 | * update the reference mod on the head to reflect this new operation | 521 | * update the reference mod on the head to reflect this new operation |
425 | */ | 522 | */ |
@@ -427,19 +524,16 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
427 | } | 524 | } |
428 | 525 | ||
429 | /* | 526 | /* |
430 | * helper function to actually insert a delayed ref into the rbtree. | 527 | * helper function to actually insert a head node into the rbtree. |
431 | * this does all the dirty work in terms of maintaining the correct | 528 | * this does all the dirty work in terms of maintaining the correct |
432 | * overall modification count in the head node and properly dealing | 529 | * overall modification count. |
433 | * with updating existing nodes as new modifications are queued. | ||
434 | */ | 530 | */ |
435 | static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | 531 | static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, |
436 | struct btrfs_delayed_ref_node *ref, | 532 | struct btrfs_delayed_ref_node *ref, |
437 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | 533 | u64 bytenr, u64 num_bytes, |
438 | u64 ref_generation, u64 owner_objectid, int action, | 534 | int action, int is_data) |
439 | int pin) | ||
440 | { | 535 | { |
441 | struct btrfs_delayed_ref_node *existing; | 536 | struct btrfs_delayed_ref_node *existing; |
442 | struct btrfs_delayed_ref *full_ref; | ||
443 | struct btrfs_delayed_ref_head *head_ref = NULL; | 537 | struct btrfs_delayed_ref_head *head_ref = NULL; |
444 | struct btrfs_delayed_ref_root *delayed_refs; | 538 | struct btrfs_delayed_ref_root *delayed_refs; |
445 | int count_mod = 1; | 539 | int count_mod = 1; |
@@ -449,12 +543,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | |||
449 | * the head node stores the sum of all the mods, so dropping a ref | 543 | * the head node stores the sum of all the mods, so dropping a ref |
450 | * should drop the sum in the head node by one. | 544 | * should drop the sum in the head node by one. |
451 | */ | 545 | */ |
452 | if (parent == (u64)-1) { | 546 | if (action == BTRFS_UPDATE_DELAYED_HEAD) |
453 | if (action == BTRFS_DROP_DELAYED_REF) | 547 | count_mod = 0; |
454 | count_mod = -1; | 548 | else if (action == BTRFS_DROP_DELAYED_REF) |
455 | else if (action == BTRFS_UPDATE_DELAYED_HEAD) | 549 | count_mod = -1; |
456 | count_mod = 0; | ||
457 | } | ||
458 | 550 | ||
459 | /* | 551 | /* |
460 | * BTRFS_ADD_DELAYED_EXTENT means that we need to update | 552 | * BTRFS_ADD_DELAYED_EXTENT means that we need to update |
@@ -467,57 +559,148 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | |||
467 | * Once we record must_insert_reserved, switch the action to | 559 | * Once we record must_insert_reserved, switch the action to |
468 | * BTRFS_ADD_DELAYED_REF because other special casing is not required. | 560 | * BTRFS_ADD_DELAYED_REF because other special casing is not required. |
469 | */ | 561 | */ |
470 | if (action == BTRFS_ADD_DELAYED_EXTENT) { | 562 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
471 | must_insert_reserved = 1; | 563 | must_insert_reserved = 1; |
472 | action = BTRFS_ADD_DELAYED_REF; | 564 | else |
473 | } else { | ||
474 | must_insert_reserved = 0; | 565 | must_insert_reserved = 0; |
475 | } | ||
476 | |||
477 | 566 | ||
478 | delayed_refs = &trans->transaction->delayed_refs; | 567 | delayed_refs = &trans->transaction->delayed_refs; |
479 | 568 | ||
480 | /* first set the basic ref node struct up */ | 569 | /* first set the basic ref node struct up */ |
481 | atomic_set(&ref->refs, 1); | 570 | atomic_set(&ref->refs, 1); |
482 | ref->bytenr = bytenr; | 571 | ref->bytenr = bytenr; |
483 | ref->parent = parent; | 572 | ref->num_bytes = num_bytes; |
484 | ref->ref_mod = count_mod; | 573 | ref->ref_mod = count_mod; |
574 | ref->type = 0; | ||
575 | ref->action = 0; | ||
576 | ref->is_head = 1; | ||
485 | ref->in_tree = 1; | 577 | ref->in_tree = 1; |
578 | |||
579 | head_ref = btrfs_delayed_node_to_head(ref); | ||
580 | head_ref->must_insert_reserved = must_insert_reserved; | ||
581 | head_ref->is_data = is_data; | ||
582 | |||
583 | INIT_LIST_HEAD(&head_ref->cluster); | ||
584 | mutex_init(&head_ref->mutex); | ||
585 | |||
586 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | ||
587 | |||
588 | if (existing) { | ||
589 | update_existing_head_ref(existing, ref); | ||
590 | /* | ||
591 | * we've updated the existing ref, free the newly | ||
592 | * allocated ref | ||
593 | */ | ||
594 | kfree(ref); | ||
595 | } else { | ||
596 | delayed_refs->num_heads++; | ||
597 | delayed_refs->num_heads_ready++; | ||
598 | delayed_refs->num_entries++; | ||
599 | trans->delayed_ref_updates++; | ||
600 | } | ||
601 | return 0; | ||
602 | } | ||
603 | |||
604 | /* | ||
605 | * helper to insert a delayed tree ref into the rbtree. | ||
606 | */ | ||
607 | static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | ||
608 | struct btrfs_delayed_ref_node *ref, | ||
609 | u64 bytenr, u64 num_bytes, u64 parent, | ||
610 | u64 ref_root, int level, int action) | ||
611 | { | ||
612 | struct btrfs_delayed_ref_node *existing; | ||
613 | struct btrfs_delayed_tree_ref *full_ref; | ||
614 | struct btrfs_delayed_ref_root *delayed_refs; | ||
615 | |||
616 | if (action == BTRFS_ADD_DELAYED_EXTENT) | ||
617 | action = BTRFS_ADD_DELAYED_REF; | ||
618 | |||
619 | delayed_refs = &trans->transaction->delayed_refs; | ||
620 | |||
621 | /* first set the basic ref node struct up */ | ||
622 | atomic_set(&ref->refs, 1); | ||
623 | ref->bytenr = bytenr; | ||
486 | ref->num_bytes = num_bytes; | 624 | ref->num_bytes = num_bytes; |
625 | ref->ref_mod = 1; | ||
626 | ref->action = action; | ||
627 | ref->is_head = 0; | ||
628 | ref->in_tree = 1; | ||
487 | 629 | ||
488 | if (btrfs_delayed_ref_is_head(ref)) { | 630 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
489 | head_ref = btrfs_delayed_node_to_head(ref); | 631 | if (parent) { |
490 | head_ref->must_insert_reserved = must_insert_reserved; | 632 | full_ref->parent = parent; |
491 | INIT_LIST_HEAD(&head_ref->cluster); | 633 | ref->type = BTRFS_SHARED_BLOCK_REF_KEY; |
492 | mutex_init(&head_ref->mutex); | ||
493 | } else { | 634 | } else { |
494 | full_ref = btrfs_delayed_node_to_ref(ref); | ||
495 | full_ref->root = ref_root; | 635 | full_ref->root = ref_root; |
496 | full_ref->generation = ref_generation; | 636 | ref->type = BTRFS_TREE_BLOCK_REF_KEY; |
497 | full_ref->owner_objectid = owner_objectid; | ||
498 | full_ref->pin = pin; | ||
499 | full_ref->action = action; | ||
500 | } | 637 | } |
638 | full_ref->level = level; | ||
501 | 639 | ||
502 | existing = tree_insert(&delayed_refs->root, bytenr, | 640 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); |
503 | parent, &ref->rb_node); | ||
504 | 641 | ||
505 | if (existing) { | 642 | if (existing) { |
506 | if (btrfs_delayed_ref_is_head(ref)) | 643 | update_existing_ref(trans, delayed_refs, existing, ref); |
507 | update_existing_head_ref(existing, ref); | 644 | /* |
508 | else | 645 | * we've updated the existing ref, free the newly |
509 | update_existing_ref(trans, delayed_refs, existing, ref); | 646 | * allocated ref |
647 | */ | ||
648 | kfree(ref); | ||
649 | } else { | ||
650 | delayed_refs->num_entries++; | ||
651 | trans->delayed_ref_updates++; | ||
652 | } | ||
653 | return 0; | ||
654 | } | ||
655 | |||
656 | /* | ||
657 | * helper to insert a delayed data ref into the rbtree. | ||
658 | */ | ||
659 | static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | ||
660 | struct btrfs_delayed_ref_node *ref, | ||
661 | u64 bytenr, u64 num_bytes, u64 parent, | ||
662 | u64 ref_root, u64 owner, u64 offset, | ||
663 | int action) | ||
664 | { | ||
665 | struct btrfs_delayed_ref_node *existing; | ||
666 | struct btrfs_delayed_data_ref *full_ref; | ||
667 | struct btrfs_delayed_ref_root *delayed_refs; | ||
668 | |||
669 | if (action == BTRFS_ADD_DELAYED_EXTENT) | ||
670 | action = BTRFS_ADD_DELAYED_REF; | ||
671 | |||
672 | delayed_refs = &trans->transaction->delayed_refs; | ||
673 | |||
674 | /* first set the basic ref node struct up */ | ||
675 | atomic_set(&ref->refs, 1); | ||
676 | ref->bytenr = bytenr; | ||
677 | ref->num_bytes = num_bytes; | ||
678 | ref->ref_mod = 1; | ||
679 | ref->action = action; | ||
680 | ref->is_head = 0; | ||
681 | ref->in_tree = 1; | ||
682 | |||
683 | full_ref = btrfs_delayed_node_to_data_ref(ref); | ||
684 | if (parent) { | ||
685 | full_ref->parent = parent; | ||
686 | ref->type = BTRFS_SHARED_DATA_REF_KEY; | ||
687 | } else { | ||
688 | full_ref->root = ref_root; | ||
689 | ref->type = BTRFS_EXTENT_DATA_REF_KEY; | ||
690 | } | ||
691 | full_ref->objectid = owner; | ||
692 | full_ref->offset = offset; | ||
510 | 693 | ||
694 | existing = tree_insert(&delayed_refs->root, &ref->rb_node); | ||
695 | |||
696 | if (existing) { | ||
697 | update_existing_ref(trans, delayed_refs, existing, ref); | ||
511 | /* | 698 | /* |
512 | * we've updated the existing ref, free the newly | 699 | * we've updated the existing ref, free the newly |
513 | * allocated ref | 700 | * allocated ref |
514 | */ | 701 | */ |
515 | kfree(ref); | 702 | kfree(ref); |
516 | } else { | 703 | } else { |
517 | if (btrfs_delayed_ref_is_head(ref)) { | ||
518 | delayed_refs->num_heads++; | ||
519 | delayed_refs->num_heads_ready++; | ||
520 | } | ||
521 | delayed_refs->num_entries++; | 704 | delayed_refs->num_entries++; |
522 | trans->delayed_ref_updates++; | 705 | trans->delayed_ref_updates++; |
523 | } | 706 | } |
@@ -525,37 +708,78 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | |||
525 | } | 708 | } |
526 | 709 | ||
527 | /* | 710 | /* |
528 | * add a delayed ref to the tree. This does all of the accounting required | 711 | * add a delayed tree ref. This does all of the accounting required |
529 | * to make sure the delayed ref is eventually processed before this | 712 | * to make sure the delayed ref is eventually processed before this |
530 | * transaction commits. | 713 | * transaction commits. |
531 | */ | 714 | */ |
532 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | 715 | int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, |
533 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | 716 | u64 bytenr, u64 num_bytes, u64 parent, |
534 | u64 ref_generation, u64 owner_objectid, int action, | 717 | u64 ref_root, int level, int action, |
535 | int pin) | 718 | struct btrfs_delayed_extent_op *extent_op) |
536 | { | 719 | { |
537 | struct btrfs_delayed_ref *ref; | 720 | struct btrfs_delayed_tree_ref *ref; |
538 | struct btrfs_delayed_ref_head *head_ref; | 721 | struct btrfs_delayed_ref_head *head_ref; |
539 | struct btrfs_delayed_ref_root *delayed_refs; | 722 | struct btrfs_delayed_ref_root *delayed_refs; |
540 | int ret; | 723 | int ret; |
541 | 724 | ||
725 | BUG_ON(extent_op && extent_op->is_data); | ||
542 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | 726 | ref = kmalloc(sizeof(*ref), GFP_NOFS); |
543 | if (!ref) | 727 | if (!ref) |
544 | return -ENOMEM; | 728 | return -ENOMEM; |
545 | 729 | ||
730 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
731 | if (!head_ref) { | ||
732 | kfree(ref); | ||
733 | return -ENOMEM; | ||
734 | } | ||
735 | |||
736 | head_ref->extent_op = extent_op; | ||
737 | |||
738 | delayed_refs = &trans->transaction->delayed_refs; | ||
739 | spin_lock(&delayed_refs->lock); | ||
740 | |||
546 | /* | 741 | /* |
547 | * the parent = 0 case comes from cases where we don't actually | 742 | * insert both the head node and the new ref without dropping |
548 | * know the parent yet. It will get updated later via a add/drop | 743 | * the spin lock |
549 | * pair. | ||
550 | */ | 744 | */ |
551 | if (parent == 0) | 745 | ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes, |
552 | parent = bytenr; | 746 | action, 0); |
747 | BUG_ON(ret); | ||
748 | |||
749 | ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes, | ||
750 | parent, ref_root, level, action); | ||
751 | BUG_ON(ret); | ||
752 | spin_unlock(&delayed_refs->lock); | ||
753 | return 0; | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. | ||
758 | */ | ||
759 | int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, | ||
760 | u64 bytenr, u64 num_bytes, | ||
761 | u64 parent, u64 ref_root, | ||
762 | u64 owner, u64 offset, int action, | ||
763 | struct btrfs_delayed_extent_op *extent_op) | ||
764 | { | ||
765 | struct btrfs_delayed_data_ref *ref; | ||
766 | struct btrfs_delayed_ref_head *head_ref; | ||
767 | struct btrfs_delayed_ref_root *delayed_refs; | ||
768 | int ret; | ||
769 | |||
770 | BUG_ON(extent_op && !extent_op->is_data); | ||
771 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
772 | if (!ref) | ||
773 | return -ENOMEM; | ||
553 | 774 | ||
554 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | 775 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); |
555 | if (!head_ref) { | 776 | if (!head_ref) { |
556 | kfree(ref); | 777 | kfree(ref); |
557 | return -ENOMEM; | 778 | return -ENOMEM; |
558 | } | 779 | } |
780 | |||
781 | head_ref->extent_op = extent_op; | ||
782 | |||
559 | delayed_refs = &trans->transaction->delayed_refs; | 783 | delayed_refs = &trans->transaction->delayed_refs; |
560 | spin_lock(&delayed_refs->lock); | 784 | spin_lock(&delayed_refs->lock); |
561 | 785 | ||
@@ -563,14 +787,39 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | |||
563 | * insert both the head node and the new ref without dropping | 787 | * insert both the head node and the new ref without dropping |
564 | * the spin lock | 788 | * the spin lock |
565 | */ | 789 | */ |
566 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | 790 | ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes, |
567 | (u64)-1, 0, 0, 0, action, pin); | 791 | action, 1); |
568 | BUG_ON(ret); | 792 | BUG_ON(ret); |
569 | 793 | ||
570 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | 794 | ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes, |
571 | parent, ref_root, ref_generation, | 795 | parent, ref_root, owner, offset, action); |
572 | owner_objectid, action, pin); | 796 | BUG_ON(ret); |
797 | spin_unlock(&delayed_refs->lock); | ||
798 | return 0; | ||
799 | } | ||
800 | |||
801 | int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | ||
802 | u64 bytenr, u64 num_bytes, | ||
803 | struct btrfs_delayed_extent_op *extent_op) | ||
804 | { | ||
805 | struct btrfs_delayed_ref_head *head_ref; | ||
806 | struct btrfs_delayed_ref_root *delayed_refs; | ||
807 | int ret; | ||
808 | |||
809 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
810 | if (!head_ref) | ||
811 | return -ENOMEM; | ||
812 | |||
813 | head_ref->extent_op = extent_op; | ||
814 | |||
815 | delayed_refs = &trans->transaction->delayed_refs; | ||
816 | spin_lock(&delayed_refs->lock); | ||
817 | |||
818 | ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, | ||
819 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, | ||
820 | extent_op->is_data); | ||
573 | BUG_ON(ret); | 821 | BUG_ON(ret); |
822 | |||
574 | spin_unlock(&delayed_refs->lock); | 823 | spin_unlock(&delayed_refs->lock); |
575 | return 0; | 824 | return 0; |
576 | } | 825 | } |
@@ -587,7 +836,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
587 | struct btrfs_delayed_ref_root *delayed_refs; | 836 | struct btrfs_delayed_ref_root *delayed_refs; |
588 | 837 | ||
589 | delayed_refs = &trans->transaction->delayed_refs; | 838 | delayed_refs = &trans->transaction->delayed_refs; |
590 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | 839 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); |
591 | if (ref) | 840 | if (ref) |
592 | return btrfs_delayed_node_to_head(ref); | 841 | return btrfs_delayed_node_to_head(ref); |
593 | return NULL; | 842 | return NULL; |
@@ -603,6 +852,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
603 | * | 852 | * |
604 | * It is the same as doing a ref add and delete in two separate calls. | 853 | * It is the same as doing a ref add and delete in two separate calls. |
605 | */ | 854 | */ |
855 | #if 0 | ||
606 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 856 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
607 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 857 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
608 | u64 parent, u64 orig_ref_root, u64 ref_root, | 858 | u64 parent, u64 orig_ref_root, u64 ref_root, |
@@ -666,3 +916,4 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | |||
666 | spin_unlock(&delayed_refs->lock); | 916 | spin_unlock(&delayed_refs->lock); |
667 | return 0; | 917 | return 0; |
668 | } | 918 | } |
919 | #endif | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 3bec2ff0b15c..f6fc67ddad36 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -30,9 +30,6 @@ struct btrfs_delayed_ref_node { | |||
30 | /* the starting bytenr of the extent */ | 30 | /* the starting bytenr of the extent */ |
31 | u64 bytenr; | 31 | u64 bytenr; |
32 | 32 | ||
33 | /* the parent our backref will point to */ | ||
34 | u64 parent; | ||
35 | |||
36 | /* the size of the extent */ | 33 | /* the size of the extent */ |
37 | u64 num_bytes; | 34 | u64 num_bytes; |
38 | 35 | ||
@@ -50,10 +47,21 @@ struct btrfs_delayed_ref_node { | |||
50 | */ | 47 | */ |
51 | int ref_mod; | 48 | int ref_mod; |
52 | 49 | ||
50 | unsigned int action:8; | ||
51 | unsigned int type:8; | ||
53 | /* is this node still in the rbtree? */ | 52 | /* is this node still in the rbtree? */ |
53 | unsigned int is_head:1; | ||
54 | unsigned int in_tree:1; | 54 | unsigned int in_tree:1; |
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct btrfs_delayed_extent_op { | ||
58 | struct btrfs_disk_key key; | ||
59 | u64 flags_to_set; | ||
60 | unsigned int update_key:1; | ||
61 | unsigned int update_flags:1; | ||
62 | unsigned int is_data:1; | ||
63 | }; | ||
64 | |||
57 | /* | 65 | /* |
58 | * the head refs are used to hold a lock on a given extent, which allows us | 66 | * the head refs are used to hold a lock on a given extent, which allows us |
59 | * to make sure that only one process is running the delayed refs | 67 | * to make sure that only one process is running the delayed refs |
@@ -71,6 +79,7 @@ struct btrfs_delayed_ref_head { | |||
71 | 79 | ||
72 | struct list_head cluster; | 80 | struct list_head cluster; |
73 | 81 | ||
82 | struct btrfs_delayed_extent_op *extent_op; | ||
74 | /* | 83 | /* |
75 | * when a new extent is allocated, it is just reserved in memory | 84 | * when a new extent is allocated, it is just reserved in memory |
76 | * The actual extent isn't inserted into the extent allocation tree | 85 | * The actual extent isn't inserted into the extent allocation tree |
@@ -84,27 +93,26 @@ struct btrfs_delayed_ref_head { | |||
84 | * the free has happened. | 93 | * the free has happened. |
85 | */ | 94 | */ |
86 | unsigned int must_insert_reserved:1; | 95 | unsigned int must_insert_reserved:1; |
96 | unsigned int is_data:1; | ||
87 | }; | 97 | }; |
88 | 98 | ||
89 | struct btrfs_delayed_ref { | 99 | struct btrfs_delayed_tree_ref { |
90 | struct btrfs_delayed_ref_node node; | 100 | struct btrfs_delayed_ref_node node; |
101 | union { | ||
102 | u64 root; | ||
103 | u64 parent; | ||
104 | }; | ||
105 | int level; | ||
106 | }; | ||
91 | 107 | ||
92 | /* the root objectid our ref will point to */ | 108 | struct btrfs_delayed_data_ref { |
93 | u64 root; | 109 | struct btrfs_delayed_ref_node node; |
94 | 110 | union { | |
95 | /* the generation for the backref */ | 111 | u64 root; |
96 | u64 generation; | 112 | u64 parent; |
97 | 113 | }; | |
98 | /* owner_objectid of the backref */ | 114 | u64 objectid; |
99 | u64 owner_objectid; | 115 | u64 offset; |
100 | |||
101 | /* operation done by this entry in the rbtree */ | ||
102 | u8 action; | ||
103 | |||
104 | /* if pin == 1, when the extent is freed it will be pinned until | ||
105 | * transaction commit | ||
106 | */ | ||
107 | unsigned int pin:1; | ||
108 | }; | 116 | }; |
109 | 117 | ||
110 | struct btrfs_delayed_ref_root { | 118 | struct btrfs_delayed_ref_root { |
@@ -143,17 +151,25 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | |||
143 | } | 151 | } |
144 | } | 152 | } |
145 | 153 | ||
146 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | 154 | int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, |
147 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | 155 | u64 bytenr, u64 num_bytes, u64 parent, |
148 | u64 ref_generation, u64 owner_objectid, int action, | 156 | u64 ref_root, int level, int action, |
149 | int pin); | 157 | struct btrfs_delayed_extent_op *extent_op); |
158 | int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, | ||
159 | u64 bytenr, u64 num_bytes, | ||
160 | u64 parent, u64 ref_root, | ||
161 | u64 owner, u64 offset, int action, | ||
162 | struct btrfs_delayed_extent_op *extent_op); | ||
163 | int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | ||
164 | u64 bytenr, u64 num_bytes, | ||
165 | struct btrfs_delayed_extent_op *extent_op); | ||
150 | 166 | ||
151 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
152 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
153 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); |
154 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | 170 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, |
155 | struct btrfs_root *root, u64 bytenr, | 171 | struct btrfs_root *root, u64 bytenr, |
156 | u64 num_bytes, u32 *refs); | 172 | u64 num_bytes, u64 *refs, u64 *flags); |
157 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 173 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
158 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 174 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
159 | u64 parent, u64 orig_ref_root, u64 ref_root, | 175 | u64 parent, u64 orig_ref_root, u64 ref_root, |
@@ -169,18 +185,24 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | |||
169 | */ | 185 | */ |
170 | static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) | 186 | static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) |
171 | { | 187 | { |
172 | return node->parent == (u64)-1; | 188 | return node->is_head; |
173 | } | 189 | } |
174 | 190 | ||
175 | /* | 191 | /* |
176 | * helper functions to cast a node into its container | 192 | * helper functions to cast a node into its container |
177 | */ | 193 | */ |
178 | static inline struct btrfs_delayed_ref * | 194 | static inline struct btrfs_delayed_tree_ref * |
179 | btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) | 195 | btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) |
180 | { | 196 | { |
181 | WARN_ON(btrfs_delayed_ref_is_head(node)); | 197 | WARN_ON(btrfs_delayed_ref_is_head(node)); |
182 | return container_of(node, struct btrfs_delayed_ref, node); | 198 | return container_of(node, struct btrfs_delayed_tree_ref, node); |
199 | } | ||
183 | 200 | ||
201 | static inline struct btrfs_delayed_data_ref * | ||
202 | btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) | ||
203 | { | ||
204 | WARN_ON(btrfs_delayed_ref_is_head(node)); | ||
205 | return container_of(node, struct btrfs_delayed_data_ref, node); | ||
184 | } | 206 | } |
185 | 207 | ||
186 | static inline struct btrfs_delayed_ref_head * | 208 | static inline struct btrfs_delayed_ref_head * |
@@ -188,6 +210,5 @@ btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) | |||
188 | { | 210 | { |
189 | WARN_ON(!btrfs_delayed_ref_is_head(node)); | 211 | WARN_ON(!btrfs_delayed_ref_is_head(node)); |
190 | return container_of(node, struct btrfs_delayed_ref_head, node); | 212 | return container_of(node, struct btrfs_delayed_ref_head, node); |
191 | |||
192 | } | 213 | } |
193 | #endif | 214 | #endif |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4b0ea0b80c23..0d50d49d990a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -26,8 +26,8 @@ | |||
26 | #include <linux/workqueue.h> | 26 | #include <linux/workqueue.h> |
27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
29 | #include <linux/crc32c.h> | ||
29 | #include "compat.h" | 30 | #include "compat.h" |
30 | #include "crc32c.h" | ||
31 | #include "ctree.h" | 31 | #include "ctree.h" |
32 | #include "disk-io.h" | 32 | #include "disk-io.h" |
33 | #include "transaction.h" | 33 | #include "transaction.h" |
@@ -36,7 +36,6 @@ | |||
36 | #include "print-tree.h" | 36 | #include "print-tree.h" |
37 | #include "async-thread.h" | 37 | #include "async-thread.h" |
38 | #include "locking.h" | 38 | #include "locking.h" |
39 | #include "ref-cache.h" | ||
40 | #include "tree-log.h" | 39 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | 40 | #include "free-space-cache.h" |
42 | 41 | ||
@@ -172,7 +171,7 @@ out: | |||
172 | 171 | ||
173 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | 172 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) |
174 | { | 173 | { |
175 | return btrfs_crc32c(seed, data, len); | 174 | return crc32c(seed, data, len); |
176 | } | 175 | } |
177 | 176 | ||
178 | void btrfs_csum_final(u32 crc, char *result) | 177 | void btrfs_csum_final(u32 crc, char *result) |
@@ -884,7 +883,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
884 | { | 883 | { |
885 | root->node = NULL; | 884 | root->node = NULL; |
886 | root->commit_root = NULL; | 885 | root->commit_root = NULL; |
887 | root->ref_tree = NULL; | ||
888 | root->sectorsize = sectorsize; | 886 | root->sectorsize = sectorsize; |
889 | root->nodesize = nodesize; | 887 | root->nodesize = nodesize; |
890 | root->leafsize = leafsize; | 888 | root->leafsize = leafsize; |
@@ -899,12 +897,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
899 | root->last_inode_alloc = 0; | 897 | root->last_inode_alloc = 0; |
900 | root->name = NULL; | 898 | root->name = NULL; |
901 | root->in_sysfs = 0; | 899 | root->in_sysfs = 0; |
900 | root->inode_tree.rb_node = NULL; | ||
902 | 901 | ||
903 | INIT_LIST_HEAD(&root->dirty_list); | 902 | INIT_LIST_HEAD(&root->dirty_list); |
904 | INIT_LIST_HEAD(&root->orphan_list); | 903 | INIT_LIST_HEAD(&root->orphan_list); |
905 | INIT_LIST_HEAD(&root->dead_list); | 904 | INIT_LIST_HEAD(&root->root_list); |
906 | spin_lock_init(&root->node_lock); | 905 | spin_lock_init(&root->node_lock); |
907 | spin_lock_init(&root->list_lock); | 906 | spin_lock_init(&root->list_lock); |
907 | spin_lock_init(&root->inode_lock); | ||
908 | mutex_init(&root->objectid_mutex); | 908 | mutex_init(&root->objectid_mutex); |
909 | mutex_init(&root->log_mutex); | 909 | mutex_init(&root->log_mutex); |
910 | init_waitqueue_head(&root->log_writer_wait); | 910 | init_waitqueue_head(&root->log_writer_wait); |
@@ -918,9 +918,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
918 | extent_io_tree_init(&root->dirty_log_pages, | 918 | extent_io_tree_init(&root->dirty_log_pages, |
919 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 919 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
920 | 920 | ||
921 | btrfs_leaf_ref_tree_init(&root->ref_tree_struct); | ||
922 | root->ref_tree = &root->ref_tree_struct; | ||
923 | |||
924 | memset(&root->root_key, 0, sizeof(root->root_key)); | 921 | memset(&root->root_key, 0, sizeof(root->root_key)); |
925 | memset(&root->root_item, 0, sizeof(root->root_item)); | 922 | memset(&root->root_item, 0, sizeof(root->root_item)); |
926 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); | 923 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); |
@@ -959,6 +956,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
959 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 956 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
960 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 957 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
961 | blocksize, generation); | 958 | blocksize, generation); |
959 | root->commit_root = btrfs_root_node(root); | ||
962 | BUG_ON(!root->node); | 960 | BUG_ON(!root->node); |
963 | return 0; | 961 | return 0; |
964 | } | 962 | } |
@@ -1025,20 +1023,19 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1025 | */ | 1023 | */ |
1026 | root->ref_cows = 0; | 1024 | root->ref_cows = 0; |
1027 | 1025 | ||
1028 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 1026 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
1029 | 0, BTRFS_TREE_LOG_OBJECTID, | 1027 | BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); |
1030 | trans->transid, 0, 0, 0); | ||
1031 | if (IS_ERR(leaf)) { | 1028 | if (IS_ERR(leaf)) { |
1032 | kfree(root); | 1029 | kfree(root); |
1033 | return ERR_CAST(leaf); | 1030 | return ERR_CAST(leaf); |
1034 | } | 1031 | } |
1035 | 1032 | ||
1033 | memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); | ||
1034 | btrfs_set_header_bytenr(leaf, leaf->start); | ||
1035 | btrfs_set_header_generation(leaf, trans->transid); | ||
1036 | btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); | ||
1037 | btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); | ||
1036 | root->node = leaf; | 1038 | root->node = leaf; |
1037 | btrfs_set_header_nritems(root->node, 0); | ||
1038 | btrfs_set_header_level(root->node, 0); | ||
1039 | btrfs_set_header_bytenr(root->node, root->node->start); | ||
1040 | btrfs_set_header_generation(root->node, trans->transid); | ||
1041 | btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); | ||
1042 | 1039 | ||
1043 | write_extent_buffer(root->node, root->fs_info->fsid, | 1040 | write_extent_buffer(root->node, root->fs_info->fsid, |
1044 | (unsigned long)btrfs_header_fsid(root->node), | 1041 | (unsigned long)btrfs_header_fsid(root->node), |
@@ -1081,8 +1078,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1081 | inode_item->nbytes = cpu_to_le64(root->leafsize); | 1078 | inode_item->nbytes = cpu_to_le64(root->leafsize); |
1082 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | 1079 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); |
1083 | 1080 | ||
1084 | btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); | 1081 | btrfs_set_root_node(&log_root->root_item, log_root->node); |
1085 | btrfs_set_root_generation(&log_root->root_item, trans->transid); | ||
1086 | 1082 | ||
1087 | WARN_ON(root->log_root); | 1083 | WARN_ON(root->log_root); |
1088 | root->log_root = log_root; | 1084 | root->log_root = log_root; |
@@ -1144,6 +1140,7 @@ out: | |||
1144 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1140 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
1145 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1141 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1146 | blocksize, generation); | 1142 | blocksize, generation); |
1143 | root->commit_root = btrfs_root_node(root); | ||
1147 | BUG_ON(!root->node); | 1144 | BUG_ON(!root->node); |
1148 | insert: | 1145 | insert: |
1149 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1146 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
@@ -1210,7 +1207,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
1210 | } | 1207 | } |
1211 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 1208 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
1212 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 1209 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
1213 | root->root_key.objectid, root); | 1210 | root->root_key.objectid); |
1214 | BUG_ON(ret); | 1211 | BUG_ON(ret); |
1215 | btrfs_orphan_cleanup(root); | 1212 | btrfs_orphan_cleanup(root); |
1216 | } | 1213 | } |
@@ -1569,8 +1566,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1569 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1566 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1570 | atomic_set(&fs_info->async_submit_draining, 0); | 1567 | atomic_set(&fs_info->async_submit_draining, 0); |
1571 | atomic_set(&fs_info->nr_async_bios, 0); | 1568 | atomic_set(&fs_info->nr_async_bios, 0); |
1572 | atomic_set(&fs_info->throttles, 0); | ||
1573 | atomic_set(&fs_info->throttle_gen, 0); | ||
1574 | fs_info->sb = sb; | 1569 | fs_info->sb = sb; |
1575 | fs_info->max_extent = (u64)-1; | 1570 | fs_info->max_extent = (u64)-1; |
1576 | fs_info->max_inline = 8192 * 1024; | 1571 | fs_info->max_inline = 8192 * 1024; |
@@ -1598,6 +1593,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1598 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; | 1593 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; |
1599 | fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; | 1594 | fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; |
1600 | 1595 | ||
1596 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); | ||
1601 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | 1597 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, |
1602 | fs_info->btree_inode->i_mapping, | 1598 | fs_info->btree_inode->i_mapping, |
1603 | GFP_NOFS); | 1599 | GFP_NOFS); |
@@ -1613,10 +1609,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1613 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1609 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1614 | fs_info->do_barriers = 1; | 1610 | fs_info->do_barriers = 1; |
1615 | 1611 | ||
1616 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | ||
1617 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); | ||
1618 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); | ||
1619 | |||
1620 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | 1612 | BTRFS_I(fs_info->btree_inode)->root = tree_root; |
1621 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | 1613 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, |
1622 | sizeof(struct btrfs_key)); | 1614 | sizeof(struct btrfs_key)); |
@@ -1674,6 +1666,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1674 | goto fail_iput; | 1666 | goto fail_iput; |
1675 | } | 1667 | } |
1676 | 1668 | ||
1669 | features = btrfs_super_incompat_flags(disk_super); | ||
1670 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | ||
1671 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | ||
1672 | btrfs_set_super_incompat_flags(disk_super, features); | ||
1673 | } | ||
1674 | |||
1677 | features = btrfs_super_compat_ro_flags(disk_super) & | 1675 | features = btrfs_super_compat_ro_flags(disk_super) & |
1678 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1676 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
1679 | if (!(sb->s_flags & MS_RDONLY) && features) { | 1677 | if (!(sb->s_flags & MS_RDONLY) && features) { |
@@ -1771,7 +1769,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1771 | if (ret) { | 1769 | if (ret) { |
1772 | printk(KERN_WARNING "btrfs: failed to read the system " | 1770 | printk(KERN_WARNING "btrfs: failed to read the system " |
1773 | "array on %s\n", sb->s_id); | 1771 | "array on %s\n", sb->s_id); |
1774 | goto fail_sys_array; | 1772 | goto fail_sb_buffer; |
1775 | } | 1773 | } |
1776 | 1774 | ||
1777 | blocksize = btrfs_level_size(tree_root, | 1775 | blocksize = btrfs_level_size(tree_root, |
@@ -1785,6 +1783,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1785 | btrfs_super_chunk_root(disk_super), | 1783 | btrfs_super_chunk_root(disk_super), |
1786 | blocksize, generation); | 1784 | blocksize, generation); |
1787 | BUG_ON(!chunk_root->node); | 1785 | BUG_ON(!chunk_root->node); |
1786 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); | ||
1787 | chunk_root->commit_root = btrfs_root_node(chunk_root); | ||
1788 | 1788 | ||
1789 | read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, | 1789 | read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, |
1790 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), | 1790 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), |
@@ -1810,7 +1810,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1810 | blocksize, generation); | 1810 | blocksize, generation); |
1811 | if (!tree_root->node) | 1811 | if (!tree_root->node) |
1812 | goto fail_chunk_root; | 1812 | goto fail_chunk_root; |
1813 | 1813 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | |
1814 | tree_root->commit_root = btrfs_root_node(tree_root); | ||
1814 | 1815 | ||
1815 | ret = find_and_setup_root(tree_root, fs_info, | 1816 | ret = find_and_setup_root(tree_root, fs_info, |
1816 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | 1817 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); |
@@ -1820,14 +1821,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1820 | 1821 | ||
1821 | ret = find_and_setup_root(tree_root, fs_info, | 1822 | ret = find_and_setup_root(tree_root, fs_info, |
1822 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 1823 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
1823 | dev_root->track_dirty = 1; | ||
1824 | if (ret) | 1824 | if (ret) |
1825 | goto fail_extent_root; | 1825 | goto fail_extent_root; |
1826 | dev_root->track_dirty = 1; | ||
1826 | 1827 | ||
1827 | ret = find_and_setup_root(tree_root, fs_info, | 1828 | ret = find_and_setup_root(tree_root, fs_info, |
1828 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | 1829 | BTRFS_CSUM_TREE_OBJECTID, csum_root); |
1829 | if (ret) | 1830 | if (ret) |
1830 | goto fail_extent_root; | 1831 | goto fail_dev_root; |
1831 | 1832 | ||
1832 | csum_root->track_dirty = 1; | 1833 | csum_root->track_dirty = 1; |
1833 | 1834 | ||
@@ -1849,6 +1850,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1849 | if (IS_ERR(fs_info->transaction_kthread)) | 1850 | if (IS_ERR(fs_info->transaction_kthread)) |
1850 | goto fail_cleaner; | 1851 | goto fail_cleaner; |
1851 | 1852 | ||
1853 | if (!btrfs_test_opt(tree_root, SSD) && | ||
1854 | !btrfs_test_opt(tree_root, NOSSD) && | ||
1855 | !fs_info->fs_devices->rotating) { | ||
1856 | printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD " | ||
1857 | "mode\n"); | ||
1858 | btrfs_set_opt(fs_info->mount_opt, SSD); | ||
1859 | } | ||
1860 | |||
1852 | if (btrfs_super_log_root(disk_super) != 0) { | 1861 | if (btrfs_super_log_root(disk_super) != 0) { |
1853 | u64 bytenr = btrfs_super_log_root(disk_super); | 1862 | u64 bytenr = btrfs_super_log_root(disk_super); |
1854 | 1863 | ||
@@ -1881,7 +1890,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1881 | } | 1890 | } |
1882 | 1891 | ||
1883 | if (!(sb->s_flags & MS_RDONLY)) { | 1892 | if (!(sb->s_flags & MS_RDONLY)) { |
1884 | ret = btrfs_cleanup_reloc_trees(tree_root); | 1893 | ret = btrfs_recover_relocation(tree_root); |
1885 | BUG_ON(ret); | 1894 | BUG_ON(ret); |
1886 | } | 1895 | } |
1887 | 1896 | ||
@@ -1892,6 +1901,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1892 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 1901 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
1893 | if (!fs_info->fs_root) | 1902 | if (!fs_info->fs_root) |
1894 | goto fail_trans_kthread; | 1903 | goto fail_trans_kthread; |
1904 | |||
1895 | return tree_root; | 1905 | return tree_root; |
1896 | 1906 | ||
1897 | fail_trans_kthread: | 1907 | fail_trans_kthread: |
@@ -1908,14 +1918,19 @@ fail_cleaner: | |||
1908 | 1918 | ||
1909 | fail_csum_root: | 1919 | fail_csum_root: |
1910 | free_extent_buffer(csum_root->node); | 1920 | free_extent_buffer(csum_root->node); |
1921 | free_extent_buffer(csum_root->commit_root); | ||
1922 | fail_dev_root: | ||
1923 | free_extent_buffer(dev_root->node); | ||
1924 | free_extent_buffer(dev_root->commit_root); | ||
1911 | fail_extent_root: | 1925 | fail_extent_root: |
1912 | free_extent_buffer(extent_root->node); | 1926 | free_extent_buffer(extent_root->node); |
1927 | free_extent_buffer(extent_root->commit_root); | ||
1913 | fail_tree_root: | 1928 | fail_tree_root: |
1914 | free_extent_buffer(tree_root->node); | 1929 | free_extent_buffer(tree_root->node); |
1930 | free_extent_buffer(tree_root->commit_root); | ||
1915 | fail_chunk_root: | 1931 | fail_chunk_root: |
1916 | free_extent_buffer(chunk_root->node); | 1932 | free_extent_buffer(chunk_root->node); |
1917 | fail_sys_array: | 1933 | free_extent_buffer(chunk_root->commit_root); |
1918 | free_extent_buffer(dev_root->node); | ||
1919 | fail_sb_buffer: | 1934 | fail_sb_buffer: |
1920 | btrfs_stop_workers(&fs_info->fixup_workers); | 1935 | btrfs_stop_workers(&fs_info->fixup_workers); |
1921 | btrfs_stop_workers(&fs_info->delalloc_workers); | 1936 | btrfs_stop_workers(&fs_info->delalloc_workers); |
@@ -2005,6 +2020,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) | |||
2005 | return latest; | 2020 | return latest; |
2006 | } | 2021 | } |
2007 | 2022 | ||
2023 | /* | ||
2024 | * this should be called twice, once with wait == 0 and | ||
2025 | * once with wait == 1. When wait == 0 is done, all the buffer heads | ||
2026 | * we write are pinned. | ||
2027 | * | ||
2028 | * They are released when wait == 1 is done. | ||
2029 | * max_mirrors must be the same for both runs, and it indicates how | ||
2030 | * many supers on this one device should be written. | ||
2031 | * | ||
2032 | * max_mirrors == 0 means to write them all. | ||
2033 | */ | ||
2008 | static int write_dev_supers(struct btrfs_device *device, | 2034 | static int write_dev_supers(struct btrfs_device *device, |
2009 | struct btrfs_super_block *sb, | 2035 | struct btrfs_super_block *sb, |
2010 | int do_barriers, int wait, int max_mirrors) | 2036 | int do_barriers, int wait, int max_mirrors) |
@@ -2040,12 +2066,16 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2040 | bh = __find_get_block(device->bdev, bytenr / 4096, | 2066 | bh = __find_get_block(device->bdev, bytenr / 4096, |
2041 | BTRFS_SUPER_INFO_SIZE); | 2067 | BTRFS_SUPER_INFO_SIZE); |
2042 | BUG_ON(!bh); | 2068 | BUG_ON(!bh); |
2043 | brelse(bh); | ||
2044 | wait_on_buffer(bh); | 2069 | wait_on_buffer(bh); |
2045 | if (buffer_uptodate(bh)) { | 2070 | if (!buffer_uptodate(bh)) |
2046 | brelse(bh); | 2071 | errors++; |
2047 | continue; | 2072 | |
2048 | } | 2073 | /* drop our reference */ |
2074 | brelse(bh); | ||
2075 | |||
2076 | /* drop the reference from the wait == 0 run */ | ||
2077 | brelse(bh); | ||
2078 | continue; | ||
2049 | } else { | 2079 | } else { |
2050 | btrfs_set_super_bytenr(sb, bytenr); | 2080 | btrfs_set_super_bytenr(sb, bytenr); |
2051 | 2081 | ||
@@ -2056,12 +2086,18 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2056 | BTRFS_CSUM_SIZE); | 2086 | BTRFS_CSUM_SIZE); |
2057 | btrfs_csum_final(crc, sb->csum); | 2087 | btrfs_csum_final(crc, sb->csum); |
2058 | 2088 | ||
2089 | /* | ||
2090 | * one reference for us, and we leave it for the | ||
2091 | * caller | ||
2092 | */ | ||
2059 | bh = __getblk(device->bdev, bytenr / 4096, | 2093 | bh = __getblk(device->bdev, bytenr / 4096, |
2060 | BTRFS_SUPER_INFO_SIZE); | 2094 | BTRFS_SUPER_INFO_SIZE); |
2061 | memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); | 2095 | memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); |
2062 | 2096 | ||
2063 | set_buffer_uptodate(bh); | 2097 | /* one reference for submit_bh */ |
2064 | get_bh(bh); | 2098 | get_bh(bh); |
2099 | |||
2100 | set_buffer_uptodate(bh); | ||
2065 | lock_buffer(bh); | 2101 | lock_buffer(bh); |
2066 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2102 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2067 | } | 2103 | } |
@@ -2073,6 +2109,7 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2073 | device->name); | 2109 | device->name); |
2074 | set_buffer_uptodate(bh); | 2110 | set_buffer_uptodate(bh); |
2075 | device->barriers = 0; | 2111 | device->barriers = 0; |
2112 | /* one reference for submit_bh */ | ||
2076 | get_bh(bh); | 2113 | get_bh(bh); |
2077 | lock_buffer(bh); | 2114 | lock_buffer(bh); |
2078 | ret = submit_bh(WRITE_SYNC, bh); | 2115 | ret = submit_bh(WRITE_SYNC, bh); |
@@ -2081,22 +2118,15 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2081 | ret = submit_bh(WRITE_SYNC, bh); | 2118 | ret = submit_bh(WRITE_SYNC, bh); |
2082 | } | 2119 | } |
2083 | 2120 | ||
2084 | if (!ret && wait) { | 2121 | if (ret) |
2085 | wait_on_buffer(bh); | ||
2086 | if (!buffer_uptodate(bh)) | ||
2087 | errors++; | ||
2088 | } else if (ret) { | ||
2089 | errors++; | 2122 | errors++; |
2090 | } | ||
2091 | if (wait) | ||
2092 | brelse(bh); | ||
2093 | } | 2123 | } |
2094 | return errors < i ? 0 : -1; | 2124 | return errors < i ? 0 : -1; |
2095 | } | 2125 | } |
2096 | 2126 | ||
2097 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2127 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2098 | { | 2128 | { |
2099 | struct list_head *head = &root->fs_info->fs_devices->devices; | 2129 | struct list_head *head; |
2100 | struct btrfs_device *dev; | 2130 | struct btrfs_device *dev; |
2101 | struct btrfs_super_block *sb; | 2131 | struct btrfs_super_block *sb; |
2102 | struct btrfs_dev_item *dev_item; | 2132 | struct btrfs_dev_item *dev_item; |
@@ -2111,6 +2141,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2111 | 2141 | ||
2112 | sb = &root->fs_info->super_for_commit; | 2142 | sb = &root->fs_info->super_for_commit; |
2113 | dev_item = &sb->dev_item; | 2143 | dev_item = &sb->dev_item; |
2144 | |||
2145 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
2146 | head = &root->fs_info->fs_devices->devices; | ||
2114 | list_for_each_entry(dev, head, dev_list) { | 2147 | list_for_each_entry(dev, head, dev_list) { |
2115 | if (!dev->bdev) { | 2148 | if (!dev->bdev) { |
2116 | total_errors++; | 2149 | total_errors++; |
@@ -2154,6 +2187,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2154 | if (ret) | 2187 | if (ret) |
2155 | total_errors++; | 2188 | total_errors++; |
2156 | } | 2189 | } |
2190 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
2157 | if (total_errors > max_errors) { | 2191 | if (total_errors > max_errors) { |
2158 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", | 2192 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", |
2159 | total_errors); | 2193 | total_errors); |
@@ -2173,6 +2207,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
2173 | 2207 | ||
2174 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2208 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
2175 | { | 2209 | { |
2210 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | ||
2176 | radix_tree_delete(&fs_info->fs_roots_radix, | 2211 | radix_tree_delete(&fs_info->fs_roots_radix, |
2177 | (unsigned long)root->root_key.objectid); | 2212 | (unsigned long)root->root_key.objectid); |
2178 | if (root->anon_super.s_dev) { | 2213 | if (root->anon_super.s_dev) { |
@@ -2219,10 +2254,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2219 | ARRAY_SIZE(gang)); | 2254 | ARRAY_SIZE(gang)); |
2220 | if (!ret) | 2255 | if (!ret) |
2221 | break; | 2256 | break; |
2257 | |||
2258 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | ||
2222 | for (i = 0; i < ret; i++) { | 2259 | for (i = 0; i < ret; i++) { |
2223 | root_objectid = gang[i]->root_key.objectid; | 2260 | root_objectid = gang[i]->root_key.objectid; |
2224 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 2261 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
2225 | root_objectid, gang[i]); | 2262 | root_objectid); |
2226 | BUG_ON(ret); | 2263 | BUG_ON(ret); |
2227 | btrfs_orphan_cleanup(gang[i]); | 2264 | btrfs_orphan_cleanup(gang[i]); |
2228 | } | 2265 | } |
@@ -2278,20 +2315,16 @@ int close_ctree(struct btrfs_root *root) | |||
2278 | (unsigned long long)fs_info->total_ref_cache_size); | 2315 | (unsigned long long)fs_info->total_ref_cache_size); |
2279 | } | 2316 | } |
2280 | 2317 | ||
2281 | if (fs_info->extent_root->node) | 2318 | free_extent_buffer(fs_info->extent_root->node); |
2282 | free_extent_buffer(fs_info->extent_root->node); | 2319 | free_extent_buffer(fs_info->extent_root->commit_root); |
2283 | 2320 | free_extent_buffer(fs_info->tree_root->node); | |
2284 | if (fs_info->tree_root->node) | 2321 | free_extent_buffer(fs_info->tree_root->commit_root); |
2285 | free_extent_buffer(fs_info->tree_root->node); | 2322 | free_extent_buffer(root->fs_info->chunk_root->node); |
2286 | 2323 | free_extent_buffer(root->fs_info->chunk_root->commit_root); | |
2287 | if (root->fs_info->chunk_root->node) | 2324 | free_extent_buffer(root->fs_info->dev_root->node); |
2288 | free_extent_buffer(root->fs_info->chunk_root->node); | 2325 | free_extent_buffer(root->fs_info->dev_root->commit_root); |
2289 | 2326 | free_extent_buffer(root->fs_info->csum_root->node); | |
2290 | if (root->fs_info->dev_root->node) | 2327 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
2291 | free_extent_buffer(root->fs_info->dev_root->node); | ||
2292 | |||
2293 | if (root->fs_info->csum_root->node) | ||
2294 | free_extent_buffer(root->fs_info->csum_root->node); | ||
2295 | 2328 | ||
2296 | btrfs_free_block_groups(root->fs_info); | 2329 | btrfs_free_block_groups(root->fs_info); |
2297 | 2330 | ||
@@ -2373,17 +2406,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2373 | * looks as though older kernels can get into trouble with | 2406 | * looks as though older kernels can get into trouble with |
2374 | * this code, they end up stuck in balance_dirty_pages forever | 2407 | * this code, they end up stuck in balance_dirty_pages forever |
2375 | */ | 2408 | */ |
2376 | struct extent_io_tree *tree; | ||
2377 | u64 num_dirty; | 2409 | u64 num_dirty; |
2378 | u64 start = 0; | ||
2379 | unsigned long thresh = 32 * 1024 * 1024; | 2410 | unsigned long thresh = 32 * 1024 * 1024; |
2380 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | ||
2381 | 2411 | ||
2382 | if (current->flags & PF_MEMALLOC) | 2412 | if (current->flags & PF_MEMALLOC) |
2383 | return; | 2413 | return; |
2384 | 2414 | ||
2385 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 2415 | num_dirty = root->fs_info->dirty_metadata_bytes; |
2386 | thresh, EXTENT_DIRTY); | 2416 | |
2387 | if (num_dirty > thresh) { | 2417 | if (num_dirty > thresh) { |
2388 | balance_dirty_pages_ratelimited_nr( | 2418 | balance_dirty_pages_ratelimited_nr( |
2389 | root->fs_info->btree_inode->i_mapping, 1); | 2419 | root->fs_info->btree_inode->i_mapping, 1); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 85315d2c90de..9596b40caa4e 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -78,7 +78,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
78 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 78 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
79 | key.offset = 0; | 79 | key.offset = 0; |
80 | 80 | ||
81 | inode = btrfs_iget(sb, &key, root, NULL); | 81 | inode = btrfs_iget(sb, &key, root); |
82 | if (IS_ERR(inode)) | 82 | if (IS_ERR(inode)) |
83 | return (void *)inode; | 83 | return (void *)inode; |
84 | 84 | ||
@@ -192,7 +192,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) | |||
192 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 192 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
193 | key.offset = 0; | 193 | key.offset = 0; |
194 | 194 | ||
195 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); | 195 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); |
196 | } | 196 | } |
197 | 197 | ||
198 | const struct export_operations btrfs_export_ops = { | 198 | const struct export_operations btrfs_export_ops = { |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 35af93355063..edc7d208c5ce 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -23,50 +23,39 @@ | |||
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include "compat.h" | 24 | #include "compat.h" |
25 | #include "hash.h" | 25 | #include "hash.h" |
26 | #include "crc32c.h" | ||
27 | #include "ctree.h" | 26 | #include "ctree.h" |
28 | #include "disk-io.h" | 27 | #include "disk-io.h" |
29 | #include "print-tree.h" | 28 | #include "print-tree.h" |
30 | #include "transaction.h" | 29 | #include "transaction.h" |
31 | #include "volumes.h" | 30 | #include "volumes.h" |
32 | #include "locking.h" | 31 | #include "locking.h" |
33 | #include "ref-cache.h" | ||
34 | #include "free-space-cache.h" | 32 | #include "free-space-cache.h" |
35 | 33 | ||
36 | #define PENDING_EXTENT_INSERT 0 | ||
37 | #define PENDING_EXTENT_DELETE 1 | ||
38 | #define PENDING_BACKREF_UPDATE 2 | ||
39 | |||
40 | struct pending_extent_op { | ||
41 | int type; | ||
42 | u64 bytenr; | ||
43 | u64 num_bytes; | ||
44 | u64 parent; | ||
45 | u64 orig_parent; | ||
46 | u64 generation; | ||
47 | u64 orig_generation; | ||
48 | int level; | ||
49 | struct list_head list; | ||
50 | int del; | ||
51 | }; | ||
52 | |||
53 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | ||
54 | struct btrfs_root *root, u64 parent, | ||
55 | u64 root_objectid, u64 ref_generation, | ||
56 | u64 owner, struct btrfs_key *ins, | ||
57 | int ref_mod); | ||
58 | static int update_reserved_extents(struct btrfs_root *root, | 34 | static int update_reserved_extents(struct btrfs_root *root, |
59 | u64 bytenr, u64 num, int reserve); | 35 | u64 bytenr, u64 num, int reserve); |
60 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
61 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
62 | u64 bytenr, u64 num_bytes, int alloc, | 38 | u64 bytenr, u64 num_bytes, int alloc, |
63 | int mark_free); | 39 | int mark_free); |
64 | static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 40 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
65 | struct btrfs_root *root, | 41 | struct btrfs_root *root, |
66 | u64 bytenr, u64 num_bytes, u64 parent, | 42 | u64 bytenr, u64 num_bytes, u64 parent, |
67 | u64 root_objectid, u64 ref_generation, | 43 | u64 root_objectid, u64 owner_objectid, |
68 | u64 owner_objectid, int pin, | 44 | u64 owner_offset, int refs_to_drop, |
69 | int ref_to_drop); | 45 | struct btrfs_delayed_extent_op *extra_op); |
46 | static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, | ||
47 | struct extent_buffer *leaf, | ||
48 | struct btrfs_extent_item *ei); | ||
49 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | ||
50 | struct btrfs_root *root, | ||
51 | u64 parent, u64 root_objectid, | ||
52 | u64 flags, u64 owner, u64 offset, | ||
53 | struct btrfs_key *ins, int ref_mod); | ||
54 | static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | ||
55 | struct btrfs_root *root, | ||
56 | u64 parent, u64 root_objectid, | ||
57 | u64 flags, struct btrfs_disk_key *key, | ||
58 | int level, struct btrfs_key *ins); | ||
70 | 59 | ||
71 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
72 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
@@ -453,199 +442,969 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
453 | * maintenance. This is actually the same as #2, but with a slightly | 442 | * maintenance. This is actually the same as #2, but with a slightly |
454 | * different use case. | 443 | * different use case. |
455 | * | 444 | * |
445 | * There are two kinds of back refs. The implicit back refs is optimized | ||
446 | * for pointers in non-shared tree blocks. For a given pointer in a block, | ||
447 | * back refs of this kind provide information about the block's owner tree | ||
448 | * and the pointer's key. These information allow us to find the block by | ||
449 | * b-tree searching. The full back refs is for pointers in tree blocks not | ||
450 | * referenced by their owner trees. The location of tree block is recorded | ||
451 | * in the back refs. Actually the full back refs is generic, and can be | ||
452 | * used in all cases the implicit back refs is used. The major shortcoming | ||
453 | * of the full back refs is its overhead. Every time a tree block gets | ||
454 | * COWed, we have to update back refs entry for all pointers in it. | ||
455 | * | ||
456 | * For a newly allocated tree block, we use implicit back refs for | ||
457 | * pointers in it. This means most tree related operations only involve | ||
458 | * implicit back refs. For a tree block created in old transaction, the | ||
459 | * only way to drop a reference to it is COW it. So we can detect the | ||
460 | * event that tree block loses its owner tree's reference and do the | ||
461 | * back refs conversion. | ||
462 | * | ||
463 | * When a tree block is COW'd through a tree, there are four cases: | ||
464 | * | ||
465 | * The reference count of the block is one and the tree is the block's | ||
466 | * owner tree. Nothing to do in this case. | ||
467 | * | ||
468 | * The reference count of the block is one and the tree is not the | ||
469 | * block's owner tree. In this case, full back refs is used for pointers | ||
470 | * in the block. Remove these full back refs, add implicit back refs for | ||
471 | * every pointers in the new block. | ||
472 | * | ||
473 | * The reference count of the block is greater than one and the tree is | ||
474 | * the block's owner tree. In this case, implicit back refs is used for | ||
475 | * pointers in the block. Add full back refs for every pointers in the | ||
476 | * block, increase lower level extents' reference counts. The original | ||
477 | * implicit back refs are entailed to the new block. | ||
478 | * | ||
479 | * The reference count of the block is greater than one and the tree is | ||
480 | * not the block's owner tree. Add implicit back refs for every pointer in | ||
481 | * the new block, increase lower level extents' reference count. | ||
482 | * | ||
483 | * Back Reference Key composing: | ||
484 | * | ||
485 | * The key objectid corresponds to the first byte in the extent, | ||
486 | * The key type is used to differentiate between types of back refs. | ||
487 | * There are different meanings of the key offset for different types | ||
488 | * of back refs. | ||
489 | * | ||
456 | * File extents can be referenced by: | 490 | * File extents can be referenced by: |
457 | * | 491 | * |
458 | * - multiple snapshots, subvolumes, or different generations in one subvol | 492 | * - multiple snapshots, subvolumes, or different generations in one subvol |
459 | * - different files inside a single subvolume | 493 | * - different files inside a single subvolume |
460 | * - different offsets inside a file (bookend extents in file.c) | 494 | * - different offsets inside a file (bookend extents in file.c) |
461 | * | 495 | * |
462 | * The extent ref structure has fields for: | 496 | * The extent ref structure for the implicit back refs has fields for: |
463 | * | 497 | * |
464 | * - Objectid of the subvolume root | 498 | * - Objectid of the subvolume root |
465 | * - Generation number of the tree holding the reference | ||
466 | * - objectid of the file holding the reference | 499 | * - objectid of the file holding the reference |
467 | * - number of references holding by parent node (alway 1 for tree blocks) | 500 | * - original offset in the file |
468 | * | 501 | * - how many bookend extents |
469 | * Btree leaf may hold multiple references to a file extent. In most cases, | ||
470 | * these references are from same file and the corresponding offsets inside | ||
471 | * the file are close together. | ||
472 | * | ||
473 | * When a file extent is allocated the fields are filled in: | ||
474 | * (root_key.objectid, trans->transid, inode objectid, 1) | ||
475 | * | 502 | * |
476 | * When a leaf is cow'd new references are added for every file extent found | 503 | * The key offset for the implicit back refs is hash of the first |
477 | * in the leaf. It looks similar to the create case, but trans->transid will | 504 | * three fields. |
478 | * be different when the block is cow'd. | ||
479 | * | 505 | * |
480 | * (root_key.objectid, trans->transid, inode objectid, | 506 | * The extent ref structure for the full back refs has field for: |
481 | * number of references in the leaf) | ||
482 | * | 507 | * |
483 | * When a file extent is removed either during snapshot deletion or | 508 | * - number of pointers in the tree leaf |
484 | * file truncation, we find the corresponding back reference and check | ||
485 | * the following fields: | ||
486 | * | 509 | * |
487 | * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), | 510 | * The key offset for the implicit back refs is the first byte of |
488 | * inode objectid) | 511 | * the tree leaf |
489 | * | 512 | * |
490 | * Btree extents can be referenced by: | 513 | * When a file extent is allocated, The implicit back refs is used. |
491 | * | 514 | * the fields are filled in: |
492 | * - Different subvolumes | ||
493 | * - Different generations of the same subvolume | ||
494 | * | ||
495 | * When a tree block is created, back references are inserted: | ||
496 | * | 515 | * |
497 | * (root->root_key.objectid, trans->transid, level, 1) | 516 | * (root_key.objectid, inode objectid, offset in file, 1) |
498 | * | 517 | * |
499 | * When a tree block is cow'd, new back references are added for all the | 518 | * When a file extent is removed file truncation, we find the |
500 | * blocks it points to. If the tree block isn't in reference counted root, | 519 | * corresponding implicit back refs and check the following fields: |
501 | * the old back references are removed. These new back references are of | ||
502 | * the form (trans->transid will have increased since creation): | ||
503 | * | 520 | * |
504 | * (root->root_key.objectid, trans->transid, level, 1) | 521 | * (btrfs_header_owner(leaf), inode objectid, offset in file) |
505 | * | 522 | * |
506 | * When a backref is in deleting, the following fields are checked: | 523 | * Btree extents can be referenced by: |
507 | * | 524 | * |
508 | * if backref was for a tree root: | 525 | * - Different subvolumes |
509 | * (btrfs_header_owner(itself), btrfs_header_generation(itself), level) | ||
510 | * else | ||
511 | * (btrfs_header_owner(parent), btrfs_header_generation(parent), level) | ||
512 | * | 526 | * |
513 | * Back Reference Key composing: | 527 | * Both the implicit back refs and the full back refs for tree blocks |
528 | * only consist of key. The key offset for the implicit back refs is | ||
529 | * objectid of block's owner tree. The key offset for the full back refs | ||
530 | * is the first byte of parent block. | ||
514 | * | 531 | * |
515 | * The key objectid corresponds to the first byte in the extent, the key | 532 | * When implicit back refs is used, information about the lowest key and |
516 | * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first | 533 | * level of the tree block are required. These information are stored in |
517 | * byte of parent extent. If a extent is tree root, the key offset is set | 534 | * tree block info structure. |
518 | * to the key objectid. | ||
519 | */ | 535 | */ |
520 | 536 | ||
521 | static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, | 537 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
522 | struct btrfs_root *root, | 538 | static int convert_extent_item_v0(struct btrfs_trans_handle *trans, |
523 | struct btrfs_path *path, | 539 | struct btrfs_root *root, |
524 | u64 bytenr, u64 parent, | 540 | struct btrfs_path *path, |
525 | u64 ref_root, u64 ref_generation, | 541 | u64 owner, u32 extra_size) |
526 | u64 owner_objectid, int del) | ||
527 | { | 542 | { |
543 | struct btrfs_extent_item *item; | ||
544 | struct btrfs_extent_item_v0 *ei0; | ||
545 | struct btrfs_extent_ref_v0 *ref0; | ||
546 | struct btrfs_tree_block_info *bi; | ||
547 | struct extent_buffer *leaf; | ||
528 | struct btrfs_key key; | 548 | struct btrfs_key key; |
529 | struct btrfs_extent_ref *ref; | 549 | struct btrfs_key found_key; |
550 | u32 new_size = sizeof(*item); | ||
551 | u64 refs; | ||
552 | int ret; | ||
553 | |||
554 | leaf = path->nodes[0]; | ||
555 | BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); | ||
556 | |||
557 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
558 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
559 | struct btrfs_extent_item_v0); | ||
560 | refs = btrfs_extent_refs_v0(leaf, ei0); | ||
561 | |||
562 | if (owner == (u64)-1) { | ||
563 | while (1) { | ||
564 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
565 | ret = btrfs_next_leaf(root, path); | ||
566 | if (ret < 0) | ||
567 | return ret; | ||
568 | BUG_ON(ret > 0); | ||
569 | leaf = path->nodes[0]; | ||
570 | } | ||
571 | btrfs_item_key_to_cpu(leaf, &found_key, | ||
572 | path->slots[0]); | ||
573 | BUG_ON(key.objectid != found_key.objectid); | ||
574 | if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { | ||
575 | path->slots[0]++; | ||
576 | continue; | ||
577 | } | ||
578 | ref0 = btrfs_item_ptr(leaf, path->slots[0], | ||
579 | struct btrfs_extent_ref_v0); | ||
580 | owner = btrfs_ref_objectid_v0(leaf, ref0); | ||
581 | break; | ||
582 | } | ||
583 | } | ||
584 | btrfs_release_path(root, path); | ||
585 | |||
586 | if (owner < BTRFS_FIRST_FREE_OBJECTID) | ||
587 | new_size += sizeof(*bi); | ||
588 | |||
589 | new_size -= sizeof(*ei0); | ||
590 | ret = btrfs_search_slot(trans, root, &key, path, | ||
591 | new_size + extra_size, 1); | ||
592 | if (ret < 0) | ||
593 | return ret; | ||
594 | BUG_ON(ret); | ||
595 | |||
596 | ret = btrfs_extend_item(trans, root, path, new_size); | ||
597 | BUG_ON(ret); | ||
598 | |||
599 | leaf = path->nodes[0]; | ||
600 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
601 | btrfs_set_extent_refs(leaf, item, refs); | ||
602 | /* FIXME: get real generation */ | ||
603 | btrfs_set_extent_generation(leaf, item, 0); | ||
604 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
605 | btrfs_set_extent_flags(leaf, item, | ||
606 | BTRFS_EXTENT_FLAG_TREE_BLOCK | | ||
607 | BTRFS_BLOCK_FLAG_FULL_BACKREF); | ||
608 | bi = (struct btrfs_tree_block_info *)(item + 1); | ||
609 | /* FIXME: get first key of the block */ | ||
610 | memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); | ||
611 | btrfs_set_tree_block_level(leaf, bi, (int)owner); | ||
612 | } else { | ||
613 | btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); | ||
614 | } | ||
615 | btrfs_mark_buffer_dirty(leaf); | ||
616 | return 0; | ||
617 | } | ||
618 | #endif | ||
619 | |||
620 | static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) | ||
621 | { | ||
622 | u32 high_crc = ~(u32)0; | ||
623 | u32 low_crc = ~(u32)0; | ||
624 | __le64 lenum; | ||
625 | |||
626 | lenum = cpu_to_le64(root_objectid); | ||
627 | high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); | ||
628 | lenum = cpu_to_le64(owner); | ||
629 | low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); | ||
630 | lenum = cpu_to_le64(offset); | ||
631 | low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); | ||
632 | |||
633 | return ((u64)high_crc << 31) ^ (u64)low_crc; | ||
634 | } | ||
635 | |||
636 | static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, | ||
637 | struct btrfs_extent_data_ref *ref) | ||
638 | { | ||
639 | return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), | ||
640 | btrfs_extent_data_ref_objectid(leaf, ref), | ||
641 | btrfs_extent_data_ref_offset(leaf, ref)); | ||
642 | } | ||
643 | |||
644 | static int match_extent_data_ref(struct extent_buffer *leaf, | ||
645 | struct btrfs_extent_data_ref *ref, | ||
646 | u64 root_objectid, u64 owner, u64 offset) | ||
647 | { | ||
648 | if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || | ||
649 | btrfs_extent_data_ref_objectid(leaf, ref) != owner || | ||
650 | btrfs_extent_data_ref_offset(leaf, ref) != offset) | ||
651 | return 0; | ||
652 | return 1; | ||
653 | } | ||
654 | |||
655 | static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, | ||
656 | struct btrfs_root *root, | ||
657 | struct btrfs_path *path, | ||
658 | u64 bytenr, u64 parent, | ||
659 | u64 root_objectid, | ||
660 | u64 owner, u64 offset) | ||
661 | { | ||
662 | struct btrfs_key key; | ||
663 | struct btrfs_extent_data_ref *ref; | ||
530 | struct extent_buffer *leaf; | 664 | struct extent_buffer *leaf; |
531 | u64 ref_objectid; | 665 | u32 nritems; |
532 | int ret; | 666 | int ret; |
667 | int recow; | ||
668 | int err = -ENOENT; | ||
533 | 669 | ||
534 | key.objectid = bytenr; | 670 | key.objectid = bytenr; |
535 | key.type = BTRFS_EXTENT_REF_KEY; | 671 | if (parent) { |
536 | key.offset = parent; | 672 | key.type = BTRFS_SHARED_DATA_REF_KEY; |
673 | key.offset = parent; | ||
674 | } else { | ||
675 | key.type = BTRFS_EXTENT_DATA_REF_KEY; | ||
676 | key.offset = hash_extent_data_ref(root_objectid, | ||
677 | owner, offset); | ||
678 | } | ||
679 | again: | ||
680 | recow = 0; | ||
681 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
682 | if (ret < 0) { | ||
683 | err = ret; | ||
684 | goto fail; | ||
685 | } | ||
537 | 686 | ||
538 | ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); | 687 | if (parent) { |
539 | if (ret < 0) | 688 | if (!ret) |
540 | goto out; | 689 | return 0; |
541 | if (ret > 0) { | 690 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
542 | ret = -ENOENT; | 691 | key.type = BTRFS_EXTENT_REF_V0_KEY; |
543 | goto out; | 692 | btrfs_release_path(root, path); |
693 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
694 | if (ret < 0) { | ||
695 | err = ret; | ||
696 | goto fail; | ||
697 | } | ||
698 | if (!ret) | ||
699 | return 0; | ||
700 | #endif | ||
701 | goto fail; | ||
544 | } | 702 | } |
545 | 703 | ||
546 | leaf = path->nodes[0]; | 704 | leaf = path->nodes[0]; |
547 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | 705 | nritems = btrfs_header_nritems(leaf); |
548 | ref_objectid = btrfs_ref_objectid(leaf, ref); | 706 | while (1) { |
549 | if (btrfs_ref_root(leaf, ref) != ref_root || | 707 | if (path->slots[0] >= nritems) { |
550 | btrfs_ref_generation(leaf, ref) != ref_generation || | 708 | ret = btrfs_next_leaf(root, path); |
551 | (ref_objectid != owner_objectid && | 709 | if (ret < 0) |
552 | ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { | 710 | err = ret; |
553 | ret = -EIO; | 711 | if (ret) |
554 | WARN_ON(1); | 712 | goto fail; |
555 | goto out; | 713 | |
714 | leaf = path->nodes[0]; | ||
715 | nritems = btrfs_header_nritems(leaf); | ||
716 | recow = 1; | ||
717 | } | ||
718 | |||
719 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
720 | if (key.objectid != bytenr || | ||
721 | key.type != BTRFS_EXTENT_DATA_REF_KEY) | ||
722 | goto fail; | ||
723 | |||
724 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
725 | struct btrfs_extent_data_ref); | ||
726 | |||
727 | if (match_extent_data_ref(leaf, ref, root_objectid, | ||
728 | owner, offset)) { | ||
729 | if (recow) { | ||
730 | btrfs_release_path(root, path); | ||
731 | goto again; | ||
732 | } | ||
733 | err = 0; | ||
734 | break; | ||
735 | } | ||
736 | path->slots[0]++; | ||
556 | } | 737 | } |
557 | ret = 0; | 738 | fail: |
558 | out: | 739 | return err; |
559 | return ret; | ||
560 | } | 740 | } |
561 | 741 | ||
562 | static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | 742 | static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, |
563 | struct btrfs_root *root, | 743 | struct btrfs_root *root, |
564 | struct btrfs_path *path, | 744 | struct btrfs_path *path, |
565 | u64 bytenr, u64 parent, | 745 | u64 bytenr, u64 parent, |
566 | u64 ref_root, u64 ref_generation, | 746 | u64 root_objectid, u64 owner, |
567 | u64 owner_objectid, | 747 | u64 offset, int refs_to_add) |
568 | int refs_to_add) | ||
569 | { | 748 | { |
570 | struct btrfs_key key; | 749 | struct btrfs_key key; |
571 | struct extent_buffer *leaf; | 750 | struct extent_buffer *leaf; |
572 | struct btrfs_extent_ref *ref; | 751 | u32 size; |
573 | u32 num_refs; | 752 | u32 num_refs; |
574 | int ret; | 753 | int ret; |
575 | 754 | ||
576 | key.objectid = bytenr; | 755 | key.objectid = bytenr; |
577 | key.type = BTRFS_EXTENT_REF_KEY; | 756 | if (parent) { |
578 | key.offset = parent; | 757 | key.type = BTRFS_SHARED_DATA_REF_KEY; |
758 | key.offset = parent; | ||
759 | size = sizeof(struct btrfs_shared_data_ref); | ||
760 | } else { | ||
761 | key.type = BTRFS_EXTENT_DATA_REF_KEY; | ||
762 | key.offset = hash_extent_data_ref(root_objectid, | ||
763 | owner, offset); | ||
764 | size = sizeof(struct btrfs_extent_data_ref); | ||
765 | } | ||
579 | 766 | ||
580 | ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); | 767 | ret = btrfs_insert_empty_item(trans, root, path, &key, size); |
581 | if (ret == 0) { | 768 | if (ret && ret != -EEXIST) |
582 | leaf = path->nodes[0]; | 769 | goto fail; |
583 | ref = btrfs_item_ptr(leaf, path->slots[0], | 770 | |
584 | struct btrfs_extent_ref); | 771 | leaf = path->nodes[0]; |
585 | btrfs_set_ref_root(leaf, ref, ref_root); | 772 | if (parent) { |
586 | btrfs_set_ref_generation(leaf, ref, ref_generation); | 773 | struct btrfs_shared_data_ref *ref; |
587 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); | ||
588 | btrfs_set_ref_num_refs(leaf, ref, refs_to_add); | ||
589 | } else if (ret == -EEXIST) { | ||
590 | u64 existing_owner; | ||
591 | |||
592 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); | ||
593 | leaf = path->nodes[0]; | ||
594 | ref = btrfs_item_ptr(leaf, path->slots[0], | 774 | ref = btrfs_item_ptr(leaf, path->slots[0], |
595 | struct btrfs_extent_ref); | 775 | struct btrfs_shared_data_ref); |
596 | if (btrfs_ref_root(leaf, ref) != ref_root || | 776 | if (ret == 0) { |
597 | btrfs_ref_generation(leaf, ref) != ref_generation) { | 777 | btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); |
598 | ret = -EIO; | 778 | } else { |
599 | WARN_ON(1); | 779 | num_refs = btrfs_shared_data_ref_count(leaf, ref); |
600 | goto out; | 780 | num_refs += refs_to_add; |
781 | btrfs_set_shared_data_ref_count(leaf, ref, num_refs); | ||
601 | } | 782 | } |
783 | } else { | ||
784 | struct btrfs_extent_data_ref *ref; | ||
785 | while (ret == -EEXIST) { | ||
786 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
787 | struct btrfs_extent_data_ref); | ||
788 | if (match_extent_data_ref(leaf, ref, root_objectid, | ||
789 | owner, offset)) | ||
790 | break; | ||
791 | btrfs_release_path(root, path); | ||
792 | key.offset++; | ||
793 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
794 | size); | ||
795 | if (ret && ret != -EEXIST) | ||
796 | goto fail; | ||
602 | 797 | ||
603 | num_refs = btrfs_ref_num_refs(leaf, ref); | 798 | leaf = path->nodes[0]; |
604 | BUG_ON(num_refs == 0); | 799 | } |
605 | btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); | 800 | ref = btrfs_item_ptr(leaf, path->slots[0], |
606 | 801 | struct btrfs_extent_data_ref); | |
607 | existing_owner = btrfs_ref_objectid(leaf, ref); | 802 | if (ret == 0) { |
608 | if (existing_owner != owner_objectid && | 803 | btrfs_set_extent_data_ref_root(leaf, ref, |
609 | existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { | 804 | root_objectid); |
610 | btrfs_set_ref_objectid(leaf, ref, | 805 | btrfs_set_extent_data_ref_objectid(leaf, ref, owner); |
611 | BTRFS_MULTIPLE_OBJECTIDS); | 806 | btrfs_set_extent_data_ref_offset(leaf, ref, offset); |
807 | btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); | ||
808 | } else { | ||
809 | num_refs = btrfs_extent_data_ref_count(leaf, ref); | ||
810 | num_refs += refs_to_add; | ||
811 | btrfs_set_extent_data_ref_count(leaf, ref, num_refs); | ||
612 | } | 812 | } |
613 | ret = 0; | ||
614 | } else { | ||
615 | goto out; | ||
616 | } | 813 | } |
617 | btrfs_unlock_up_safe(path, 1); | 814 | btrfs_mark_buffer_dirty(leaf); |
618 | btrfs_mark_buffer_dirty(path->nodes[0]); | 815 | ret = 0; |
619 | out: | 816 | fail: |
620 | btrfs_release_path(root, path); | 817 | btrfs_release_path(root, path); |
621 | return ret; | 818 | return ret; |
622 | } | 819 | } |
623 | 820 | ||
624 | static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, | 821 | static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, |
625 | struct btrfs_root *root, | 822 | struct btrfs_root *root, |
626 | struct btrfs_path *path, | 823 | struct btrfs_path *path, |
627 | int refs_to_drop) | 824 | int refs_to_drop) |
628 | { | 825 | { |
826 | struct btrfs_key key; | ||
827 | struct btrfs_extent_data_ref *ref1 = NULL; | ||
828 | struct btrfs_shared_data_ref *ref2 = NULL; | ||
629 | struct extent_buffer *leaf; | 829 | struct extent_buffer *leaf; |
630 | struct btrfs_extent_ref *ref; | 830 | u32 num_refs = 0; |
631 | u32 num_refs; | ||
632 | int ret = 0; | 831 | int ret = 0; |
633 | 832 | ||
634 | leaf = path->nodes[0]; | 833 | leaf = path->nodes[0]; |
635 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | 834 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
636 | num_refs = btrfs_ref_num_refs(leaf, ref); | 835 | |
836 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
837 | ref1 = btrfs_item_ptr(leaf, path->slots[0], | ||
838 | struct btrfs_extent_data_ref); | ||
839 | num_refs = btrfs_extent_data_ref_count(leaf, ref1); | ||
840 | } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { | ||
841 | ref2 = btrfs_item_ptr(leaf, path->slots[0], | ||
842 | struct btrfs_shared_data_ref); | ||
843 | num_refs = btrfs_shared_data_ref_count(leaf, ref2); | ||
844 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
845 | } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
846 | struct btrfs_extent_ref_v0 *ref0; | ||
847 | ref0 = btrfs_item_ptr(leaf, path->slots[0], | ||
848 | struct btrfs_extent_ref_v0); | ||
849 | num_refs = btrfs_ref_count_v0(leaf, ref0); | ||
850 | #endif | ||
851 | } else { | ||
852 | BUG(); | ||
853 | } | ||
854 | |||
637 | BUG_ON(num_refs < refs_to_drop); | 855 | BUG_ON(num_refs < refs_to_drop); |
638 | num_refs -= refs_to_drop; | 856 | num_refs -= refs_to_drop; |
857 | |||
639 | if (num_refs == 0) { | 858 | if (num_refs == 0) { |
640 | ret = btrfs_del_item(trans, root, path); | 859 | ret = btrfs_del_item(trans, root, path); |
641 | } else { | 860 | } else { |
642 | btrfs_set_ref_num_refs(leaf, ref, num_refs); | 861 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) |
862 | btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); | ||
863 | else if (key.type == BTRFS_SHARED_DATA_REF_KEY) | ||
864 | btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); | ||
865 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
866 | else { | ||
867 | struct btrfs_extent_ref_v0 *ref0; | ||
868 | ref0 = btrfs_item_ptr(leaf, path->slots[0], | ||
869 | struct btrfs_extent_ref_v0); | ||
870 | btrfs_set_ref_count_v0(leaf, ref0, num_refs); | ||
871 | } | ||
872 | #endif | ||
643 | btrfs_mark_buffer_dirty(leaf); | 873 | btrfs_mark_buffer_dirty(leaf); |
644 | } | 874 | } |
875 | return ret; | ||
876 | } | ||
877 | |||
878 | static noinline u32 extent_data_ref_count(struct btrfs_root *root, | ||
879 | struct btrfs_path *path, | ||
880 | struct btrfs_extent_inline_ref *iref) | ||
881 | { | ||
882 | struct btrfs_key key; | ||
883 | struct extent_buffer *leaf; | ||
884 | struct btrfs_extent_data_ref *ref1; | ||
885 | struct btrfs_shared_data_ref *ref2; | ||
886 | u32 num_refs = 0; | ||
887 | |||
888 | leaf = path->nodes[0]; | ||
889 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
890 | if (iref) { | ||
891 | if (btrfs_extent_inline_ref_type(leaf, iref) == | ||
892 | BTRFS_EXTENT_DATA_REF_KEY) { | ||
893 | ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
894 | num_refs = btrfs_extent_data_ref_count(leaf, ref1); | ||
895 | } else { | ||
896 | ref2 = (struct btrfs_shared_data_ref *)(iref + 1); | ||
897 | num_refs = btrfs_shared_data_ref_count(leaf, ref2); | ||
898 | } | ||
899 | } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
900 | ref1 = btrfs_item_ptr(leaf, path->slots[0], | ||
901 | struct btrfs_extent_data_ref); | ||
902 | num_refs = btrfs_extent_data_ref_count(leaf, ref1); | ||
903 | } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { | ||
904 | ref2 = btrfs_item_ptr(leaf, path->slots[0], | ||
905 | struct btrfs_shared_data_ref); | ||
906 | num_refs = btrfs_shared_data_ref_count(leaf, ref2); | ||
907 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
908 | } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
909 | struct btrfs_extent_ref_v0 *ref0; | ||
910 | ref0 = btrfs_item_ptr(leaf, path->slots[0], | ||
911 | struct btrfs_extent_ref_v0); | ||
912 | num_refs = btrfs_ref_count_v0(leaf, ref0); | ||
913 | #endif | ||
914 | } else { | ||
915 | WARN_ON(1); | ||
916 | } | ||
917 | return num_refs; | ||
918 | } | ||
919 | |||
920 | static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, | ||
921 | struct btrfs_root *root, | ||
922 | struct btrfs_path *path, | ||
923 | u64 bytenr, u64 parent, | ||
924 | u64 root_objectid) | ||
925 | { | ||
926 | struct btrfs_key key; | ||
927 | int ret; | ||
928 | |||
929 | key.objectid = bytenr; | ||
930 | if (parent) { | ||
931 | key.type = BTRFS_SHARED_BLOCK_REF_KEY; | ||
932 | key.offset = parent; | ||
933 | } else { | ||
934 | key.type = BTRFS_TREE_BLOCK_REF_KEY; | ||
935 | key.offset = root_objectid; | ||
936 | } | ||
937 | |||
938 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
939 | if (ret > 0) | ||
940 | ret = -ENOENT; | ||
941 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
942 | if (ret == -ENOENT && parent) { | ||
943 | btrfs_release_path(root, path); | ||
944 | key.type = BTRFS_EXTENT_REF_V0_KEY; | ||
945 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
946 | if (ret > 0) | ||
947 | ret = -ENOENT; | ||
948 | } | ||
949 | #endif | ||
950 | return ret; | ||
951 | } | ||
952 | |||
953 | static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, | ||
954 | struct btrfs_root *root, | ||
955 | struct btrfs_path *path, | ||
956 | u64 bytenr, u64 parent, | ||
957 | u64 root_objectid) | ||
958 | { | ||
959 | struct btrfs_key key; | ||
960 | int ret; | ||
961 | |||
962 | key.objectid = bytenr; | ||
963 | if (parent) { | ||
964 | key.type = BTRFS_SHARED_BLOCK_REF_KEY; | ||
965 | key.offset = parent; | ||
966 | } else { | ||
967 | key.type = BTRFS_TREE_BLOCK_REF_KEY; | ||
968 | key.offset = root_objectid; | ||
969 | } | ||
970 | |||
971 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | ||
645 | btrfs_release_path(root, path); | 972 | btrfs_release_path(root, path); |
646 | return ret; | 973 | return ret; |
647 | } | 974 | } |
648 | 975 | ||
976 | static inline int extent_ref_type(u64 parent, u64 owner) | ||
977 | { | ||
978 | int type; | ||
979 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
980 | if (parent > 0) | ||
981 | type = BTRFS_SHARED_BLOCK_REF_KEY; | ||
982 | else | ||
983 | type = BTRFS_TREE_BLOCK_REF_KEY; | ||
984 | } else { | ||
985 | if (parent > 0) | ||
986 | type = BTRFS_SHARED_DATA_REF_KEY; | ||
987 | else | ||
988 | type = BTRFS_EXTENT_DATA_REF_KEY; | ||
989 | } | ||
990 | return type; | ||
991 | } | ||
992 | |||
993 | static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) | ||
994 | |||
995 | { | ||
996 | int level; | ||
997 | BUG_ON(!path->keep_locks); | ||
998 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
999 | if (!path->nodes[level]) | ||
1000 | break; | ||
1001 | btrfs_assert_tree_locked(path->nodes[level]); | ||
1002 | if (path->slots[level] + 1 >= | ||
1003 | btrfs_header_nritems(path->nodes[level])) | ||
1004 | continue; | ||
1005 | if (level == 0) | ||
1006 | btrfs_item_key_to_cpu(path->nodes[level], key, | ||
1007 | path->slots[level] + 1); | ||
1008 | else | ||
1009 | btrfs_node_key_to_cpu(path->nodes[level], key, | ||
1010 | path->slots[level] + 1); | ||
1011 | return 0; | ||
1012 | } | ||
1013 | return 1; | ||
1014 | } | ||
1015 | |||
1016 | /* | ||
1017 | * look for inline back ref. if back ref is found, *ref_ret is set | ||
1018 | * to the address of inline back ref, and 0 is returned. | ||
1019 | * | ||
1020 | * if back ref isn't found, *ref_ret is set to the address where it | ||
1021 | * should be inserted, and -ENOENT is returned. | ||
1022 | * | ||
1023 | * if insert is true and there are too many inline back refs, the path | ||
1024 | * points to the extent item, and -EAGAIN is returned. | ||
1025 | * | ||
1026 | * NOTE: inline back refs are ordered in the same way that back ref | ||
1027 | * items in the tree are ordered. | ||
1028 | */ | ||
1029 | static noinline_for_stack | ||
1030 | int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, | ||
1031 | struct btrfs_root *root, | ||
1032 | struct btrfs_path *path, | ||
1033 | struct btrfs_extent_inline_ref **ref_ret, | ||
1034 | u64 bytenr, u64 num_bytes, | ||
1035 | u64 parent, u64 root_objectid, | ||
1036 | u64 owner, u64 offset, int insert) | ||
1037 | { | ||
1038 | struct btrfs_key key; | ||
1039 | struct extent_buffer *leaf; | ||
1040 | struct btrfs_extent_item *ei; | ||
1041 | struct btrfs_extent_inline_ref *iref; | ||
1042 | u64 flags; | ||
1043 | u64 item_size; | ||
1044 | unsigned long ptr; | ||
1045 | unsigned long end; | ||
1046 | int extra_size; | ||
1047 | int type; | ||
1048 | int want; | ||
1049 | int ret; | ||
1050 | int err = 0; | ||
1051 | |||
1052 | key.objectid = bytenr; | ||
1053 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
1054 | key.offset = num_bytes; | ||
1055 | |||
1056 | want = extent_ref_type(parent, owner); | ||
1057 | if (insert) { | ||
1058 | extra_size = btrfs_extent_inline_ref_size(want); | ||
1059 | path->keep_locks = 1; | ||
1060 | } else | ||
1061 | extra_size = -1; | ||
1062 | ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); | ||
1063 | if (ret < 0) { | ||
1064 | err = ret; | ||
1065 | goto out; | ||
1066 | } | ||
1067 | BUG_ON(ret); | ||
1068 | |||
1069 | leaf = path->nodes[0]; | ||
1070 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
1071 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
1072 | if (item_size < sizeof(*ei)) { | ||
1073 | if (!insert) { | ||
1074 | err = -ENOENT; | ||
1075 | goto out; | ||
1076 | } | ||
1077 | ret = convert_extent_item_v0(trans, root, path, owner, | ||
1078 | extra_size); | ||
1079 | if (ret < 0) { | ||
1080 | err = ret; | ||
1081 | goto out; | ||
1082 | } | ||
1083 | leaf = path->nodes[0]; | ||
1084 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
1085 | } | ||
1086 | #endif | ||
1087 | BUG_ON(item_size < sizeof(*ei)); | ||
1088 | |||
1089 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
1090 | flags = btrfs_extent_flags(leaf, ei); | ||
1091 | |||
1092 | ptr = (unsigned long)(ei + 1); | ||
1093 | end = (unsigned long)ei + item_size; | ||
1094 | |||
1095 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
1096 | ptr += sizeof(struct btrfs_tree_block_info); | ||
1097 | BUG_ON(ptr > end); | ||
1098 | } else { | ||
1099 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); | ||
1100 | } | ||
1101 | |||
1102 | err = -ENOENT; | ||
1103 | while (1) { | ||
1104 | if (ptr >= end) { | ||
1105 | WARN_ON(ptr > end); | ||
1106 | break; | ||
1107 | } | ||
1108 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
1109 | type = btrfs_extent_inline_ref_type(leaf, iref); | ||
1110 | if (want < type) | ||
1111 | break; | ||
1112 | if (want > type) { | ||
1113 | ptr += btrfs_extent_inline_ref_size(type); | ||
1114 | continue; | ||
1115 | } | ||
1116 | |||
1117 | if (type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
1118 | struct btrfs_extent_data_ref *dref; | ||
1119 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
1120 | if (match_extent_data_ref(leaf, dref, root_objectid, | ||
1121 | owner, offset)) { | ||
1122 | err = 0; | ||
1123 | break; | ||
1124 | } | ||
1125 | if (hash_extent_data_ref_item(leaf, dref) < | ||
1126 | hash_extent_data_ref(root_objectid, owner, offset)) | ||
1127 | break; | ||
1128 | } else { | ||
1129 | u64 ref_offset; | ||
1130 | ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); | ||
1131 | if (parent > 0) { | ||
1132 | if (parent == ref_offset) { | ||
1133 | err = 0; | ||
1134 | break; | ||
1135 | } | ||
1136 | if (ref_offset < parent) | ||
1137 | break; | ||
1138 | } else { | ||
1139 | if (root_objectid == ref_offset) { | ||
1140 | err = 0; | ||
1141 | break; | ||
1142 | } | ||
1143 | if (ref_offset < root_objectid) | ||
1144 | break; | ||
1145 | } | ||
1146 | } | ||
1147 | ptr += btrfs_extent_inline_ref_size(type); | ||
1148 | } | ||
1149 | if (err == -ENOENT && insert) { | ||
1150 | if (item_size + extra_size >= | ||
1151 | BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { | ||
1152 | err = -EAGAIN; | ||
1153 | goto out; | ||
1154 | } | ||
1155 | /* | ||
1156 | * To add new inline back ref, we have to make sure | ||
1157 | * there is no corresponding back ref item. | ||
1158 | * For simplicity, we just do not add new inline back | ||
1159 | * ref if there is any kind of item for this block | ||
1160 | */ | ||
1161 | if (find_next_key(path, &key) == 0 && key.objectid == bytenr && | ||
1162 | key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { | ||
1163 | err = -EAGAIN; | ||
1164 | goto out; | ||
1165 | } | ||
1166 | } | ||
1167 | *ref_ret = (struct btrfs_extent_inline_ref *)ptr; | ||
1168 | out: | ||
1169 | if (insert) { | ||
1170 | path->keep_locks = 0; | ||
1171 | btrfs_unlock_up_safe(path, 1); | ||
1172 | } | ||
1173 | return err; | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * helper to add new inline back ref | ||
1178 | */ | ||
1179 | static noinline_for_stack | ||
1180 | int setup_inline_extent_backref(struct btrfs_trans_handle *trans, | ||
1181 | struct btrfs_root *root, | ||
1182 | struct btrfs_path *path, | ||
1183 | struct btrfs_extent_inline_ref *iref, | ||
1184 | u64 parent, u64 root_objectid, | ||
1185 | u64 owner, u64 offset, int refs_to_add, | ||
1186 | struct btrfs_delayed_extent_op *extent_op) | ||
1187 | { | ||
1188 | struct extent_buffer *leaf; | ||
1189 | struct btrfs_extent_item *ei; | ||
1190 | unsigned long ptr; | ||
1191 | unsigned long end; | ||
1192 | unsigned long item_offset; | ||
1193 | u64 refs; | ||
1194 | int size; | ||
1195 | int type; | ||
1196 | int ret; | ||
1197 | |||
1198 | leaf = path->nodes[0]; | ||
1199 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
1200 | item_offset = (unsigned long)iref - (unsigned long)ei; | ||
1201 | |||
1202 | type = extent_ref_type(parent, owner); | ||
1203 | size = btrfs_extent_inline_ref_size(type); | ||
1204 | |||
1205 | ret = btrfs_extend_item(trans, root, path, size); | ||
1206 | BUG_ON(ret); | ||
1207 | |||
1208 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
1209 | refs = btrfs_extent_refs(leaf, ei); | ||
1210 | refs += refs_to_add; | ||
1211 | btrfs_set_extent_refs(leaf, ei, refs); | ||
1212 | if (extent_op) | ||
1213 | __run_delayed_extent_op(extent_op, leaf, ei); | ||
1214 | |||
1215 | ptr = (unsigned long)ei + item_offset; | ||
1216 | end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); | ||
1217 | if (ptr < end - size) | ||
1218 | memmove_extent_buffer(leaf, ptr + size, ptr, | ||
1219 | end - size - ptr); | ||
1220 | |||
1221 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
1222 | btrfs_set_extent_inline_ref_type(leaf, iref, type); | ||
1223 | if (type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
1224 | struct btrfs_extent_data_ref *dref; | ||
1225 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
1226 | btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); | ||
1227 | btrfs_set_extent_data_ref_objectid(leaf, dref, owner); | ||
1228 | btrfs_set_extent_data_ref_offset(leaf, dref, offset); | ||
1229 | btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); | ||
1230 | } else if (type == BTRFS_SHARED_DATA_REF_KEY) { | ||
1231 | struct btrfs_shared_data_ref *sref; | ||
1232 | sref = (struct btrfs_shared_data_ref *)(iref + 1); | ||
1233 | btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); | ||
1234 | btrfs_set_extent_inline_ref_offset(leaf, iref, parent); | ||
1235 | } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { | ||
1236 | btrfs_set_extent_inline_ref_offset(leaf, iref, parent); | ||
1237 | } else { | ||
1238 | btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); | ||
1239 | } | ||
1240 | btrfs_mark_buffer_dirty(leaf); | ||
1241 | return 0; | ||
1242 | } | ||
1243 | |||
1244 | static int lookup_extent_backref(struct btrfs_trans_handle *trans, | ||
1245 | struct btrfs_root *root, | ||
1246 | struct btrfs_path *path, | ||
1247 | struct btrfs_extent_inline_ref **ref_ret, | ||
1248 | u64 bytenr, u64 num_bytes, u64 parent, | ||
1249 | u64 root_objectid, u64 owner, u64 offset) | ||
1250 | { | ||
1251 | int ret; | ||
1252 | |||
1253 | ret = lookup_inline_extent_backref(trans, root, path, ref_ret, | ||
1254 | bytenr, num_bytes, parent, | ||
1255 | root_objectid, owner, offset, 0); | ||
1256 | if (ret != -ENOENT) | ||
1257 | return ret; | ||
1258 | |||
1259 | btrfs_release_path(root, path); | ||
1260 | *ref_ret = NULL; | ||
1261 | |||
1262 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
1263 | ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, | ||
1264 | root_objectid); | ||
1265 | } else { | ||
1266 | ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, | ||
1267 | root_objectid, owner, offset); | ||
1268 | } | ||
1269 | return ret; | ||
1270 | } | ||
1271 | |||
1272 | /* | ||
1273 | * helper to update/remove inline back ref | ||
1274 | */ | ||
1275 | static noinline_for_stack | ||
1276 | int update_inline_extent_backref(struct btrfs_trans_handle *trans, | ||
1277 | struct btrfs_root *root, | ||
1278 | struct btrfs_path *path, | ||
1279 | struct btrfs_extent_inline_ref *iref, | ||
1280 | int refs_to_mod, | ||
1281 | struct btrfs_delayed_extent_op *extent_op) | ||
1282 | { | ||
1283 | struct extent_buffer *leaf; | ||
1284 | struct btrfs_extent_item *ei; | ||
1285 | struct btrfs_extent_data_ref *dref = NULL; | ||
1286 | struct btrfs_shared_data_ref *sref = NULL; | ||
1287 | unsigned long ptr; | ||
1288 | unsigned long end; | ||
1289 | u32 item_size; | ||
1290 | int size; | ||
1291 | int type; | ||
1292 | int ret; | ||
1293 | u64 refs; | ||
1294 | |||
1295 | leaf = path->nodes[0]; | ||
1296 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
1297 | refs = btrfs_extent_refs(leaf, ei); | ||
1298 | WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); | ||
1299 | refs += refs_to_mod; | ||
1300 | btrfs_set_extent_refs(leaf, ei, refs); | ||
1301 | if (extent_op) | ||
1302 | __run_delayed_extent_op(extent_op, leaf, ei); | ||
1303 | |||
1304 | type = btrfs_extent_inline_ref_type(leaf, iref); | ||
1305 | |||
1306 | if (type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
1307 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
1308 | refs = btrfs_extent_data_ref_count(leaf, dref); | ||
1309 | } else if (type == BTRFS_SHARED_DATA_REF_KEY) { | ||
1310 | sref = (struct btrfs_shared_data_ref *)(iref + 1); | ||
1311 | refs = btrfs_shared_data_ref_count(leaf, sref); | ||
1312 | } else { | ||
1313 | refs = 1; | ||
1314 | BUG_ON(refs_to_mod != -1); | ||
1315 | } | ||
1316 | |||
1317 | BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); | ||
1318 | refs += refs_to_mod; | ||
1319 | |||
1320 | if (refs > 0) { | ||
1321 | if (type == BTRFS_EXTENT_DATA_REF_KEY) | ||
1322 | btrfs_set_extent_data_ref_count(leaf, dref, refs); | ||
1323 | else | ||
1324 | btrfs_set_shared_data_ref_count(leaf, sref, refs); | ||
1325 | } else { | ||
1326 | size = btrfs_extent_inline_ref_size(type); | ||
1327 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
1328 | ptr = (unsigned long)iref; | ||
1329 | end = (unsigned long)ei + item_size; | ||
1330 | if (ptr + size < end) | ||
1331 | memmove_extent_buffer(leaf, ptr, ptr + size, | ||
1332 | end - ptr - size); | ||
1333 | item_size -= size; | ||
1334 | ret = btrfs_truncate_item(trans, root, path, item_size, 1); | ||
1335 | BUG_ON(ret); | ||
1336 | } | ||
1337 | btrfs_mark_buffer_dirty(leaf); | ||
1338 | return 0; | ||
1339 | } | ||
1340 | |||
1341 | static noinline_for_stack | ||
1342 | int insert_inline_extent_backref(struct btrfs_trans_handle *trans, | ||
1343 | struct btrfs_root *root, | ||
1344 | struct btrfs_path *path, | ||
1345 | u64 bytenr, u64 num_bytes, u64 parent, | ||
1346 | u64 root_objectid, u64 owner, | ||
1347 | u64 offset, int refs_to_add, | ||
1348 | struct btrfs_delayed_extent_op *extent_op) | ||
1349 | { | ||
1350 | struct btrfs_extent_inline_ref *iref; | ||
1351 | int ret; | ||
1352 | |||
1353 | ret = lookup_inline_extent_backref(trans, root, path, &iref, | ||
1354 | bytenr, num_bytes, parent, | ||
1355 | root_objectid, owner, offset, 1); | ||
1356 | if (ret == 0) { | ||
1357 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); | ||
1358 | ret = update_inline_extent_backref(trans, root, path, iref, | ||
1359 | refs_to_add, extent_op); | ||
1360 | } else if (ret == -ENOENT) { | ||
1361 | ret = setup_inline_extent_backref(trans, root, path, iref, | ||
1362 | parent, root_objectid, | ||
1363 | owner, offset, refs_to_add, | ||
1364 | extent_op); | ||
1365 | } | ||
1366 | return ret; | ||
1367 | } | ||
1368 | |||
1369 | static int insert_extent_backref(struct btrfs_trans_handle *trans, | ||
1370 | struct btrfs_root *root, | ||
1371 | struct btrfs_path *path, | ||
1372 | u64 bytenr, u64 parent, u64 root_objectid, | ||
1373 | u64 owner, u64 offset, int refs_to_add) | ||
1374 | { | ||
1375 | int ret; | ||
1376 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
1377 | BUG_ON(refs_to_add != 1); | ||
1378 | ret = insert_tree_block_ref(trans, root, path, bytenr, | ||
1379 | parent, root_objectid); | ||
1380 | } else { | ||
1381 | ret = insert_extent_data_ref(trans, root, path, bytenr, | ||
1382 | parent, root_objectid, | ||
1383 | owner, offset, refs_to_add); | ||
1384 | } | ||
1385 | return ret; | ||
1386 | } | ||
1387 | |||
1388 | static int remove_extent_backref(struct btrfs_trans_handle *trans, | ||
1389 | struct btrfs_root *root, | ||
1390 | struct btrfs_path *path, | ||
1391 | struct btrfs_extent_inline_ref *iref, | ||
1392 | int refs_to_drop, int is_data) | ||
1393 | { | ||
1394 | int ret; | ||
1395 | |||
1396 | BUG_ON(!is_data && refs_to_drop != 1); | ||
1397 | if (iref) { | ||
1398 | ret = update_inline_extent_backref(trans, root, path, iref, | ||
1399 | -refs_to_drop, NULL); | ||
1400 | } else if (is_data) { | ||
1401 | ret = remove_extent_data_ref(trans, root, path, refs_to_drop); | ||
1402 | } else { | ||
1403 | ret = btrfs_del_item(trans, root, path); | ||
1404 | } | ||
1405 | return ret; | ||
1406 | } | ||
1407 | |||
649 | #ifdef BIO_RW_DISCARD | 1408 | #ifdef BIO_RW_DISCARD |
650 | static void btrfs_issue_discard(struct block_device *bdev, | 1409 | static void btrfs_issue_discard(struct block_device *bdev, |
651 | u64 start, u64 len) | 1410 | u64 start, u64 len) |
@@ -686,71 +1445,40 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
686 | #endif | 1445 | #endif |
687 | } | 1446 | } |
688 | 1447 | ||
689 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 1448 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
690 | struct btrfs_root *root, u64 bytenr, | 1449 | struct btrfs_root *root, |
691 | u64 num_bytes, | 1450 | u64 bytenr, u64 num_bytes, u64 parent, |
692 | u64 orig_parent, u64 parent, | 1451 | u64 root_objectid, u64 owner, u64 offset) |
693 | u64 orig_root, u64 ref_root, | ||
694 | u64 orig_generation, u64 ref_generation, | ||
695 | u64 owner_objectid) | ||
696 | { | 1452 | { |
697 | int ret; | 1453 | int ret; |
698 | int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; | 1454 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && |
1455 | root_objectid == BTRFS_TREE_LOG_OBJECTID); | ||
699 | 1456 | ||
700 | ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, | 1457 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
701 | orig_parent, parent, orig_root, | 1458 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
702 | ref_root, orig_generation, | 1459 | parent, root_objectid, (int)owner, |
703 | ref_generation, owner_objectid, pin); | 1460 | BTRFS_ADD_DELAYED_REF, NULL); |
704 | BUG_ON(ret); | 1461 | } else { |
1462 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | ||
1463 | parent, root_objectid, owner, offset, | ||
1464 | BTRFS_ADD_DELAYED_REF, NULL); | ||
1465 | } | ||
705 | return ret; | 1466 | return ret; |
706 | } | 1467 | } |
707 | 1468 | ||
708 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | ||
709 | struct btrfs_root *root, u64 bytenr, | ||
710 | u64 num_bytes, u64 orig_parent, u64 parent, | ||
711 | u64 ref_root, u64 ref_generation, | ||
712 | u64 owner_objectid) | ||
713 | { | ||
714 | int ret; | ||
715 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | ||
716 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
717 | return 0; | ||
718 | |||
719 | ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes, | ||
720 | orig_parent, parent, ref_root, | ||
721 | ref_root, ref_generation, | ||
722 | ref_generation, owner_objectid); | ||
723 | return ret; | ||
724 | } | ||
725 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1469 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
726 | struct btrfs_root *root, u64 bytenr, | 1470 | struct btrfs_root *root, |
727 | u64 num_bytes, | 1471 | u64 bytenr, u64 num_bytes, |
728 | u64 orig_parent, u64 parent, | 1472 | u64 parent, u64 root_objectid, |
729 | u64 orig_root, u64 ref_root, | 1473 | u64 owner, u64 offset, int refs_to_add, |
730 | u64 orig_generation, u64 ref_generation, | 1474 | struct btrfs_delayed_extent_op *extent_op) |
731 | u64 owner_objectid) | ||
732 | { | ||
733 | int ret; | ||
734 | |||
735 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root, | ||
736 | ref_generation, owner_objectid, | ||
737 | BTRFS_ADD_DELAYED_REF, 0); | ||
738 | BUG_ON(ret); | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, | ||
743 | struct btrfs_root *root, u64 bytenr, | ||
744 | u64 num_bytes, u64 parent, u64 ref_root, | ||
745 | u64 ref_generation, u64 owner_objectid, | ||
746 | int refs_to_add) | ||
747 | { | 1475 | { |
748 | struct btrfs_path *path; | 1476 | struct btrfs_path *path; |
749 | int ret; | 1477 | struct extent_buffer *leaf; |
750 | struct btrfs_key key; | ||
751 | struct extent_buffer *l; | ||
752 | struct btrfs_extent_item *item; | 1478 | struct btrfs_extent_item *item; |
753 | u32 refs; | 1479 | u64 refs; |
1480 | int ret; | ||
1481 | int err = 0; | ||
754 | 1482 | ||
755 | path = btrfs_alloc_path(); | 1483 | path = btrfs_alloc_path(); |
756 | if (!path) | 1484 | if (!path) |
@@ -758,43 +1486,27 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, | |||
758 | 1486 | ||
759 | path->reada = 1; | 1487 | path->reada = 1; |
760 | path->leave_spinning = 1; | 1488 | path->leave_spinning = 1; |
761 | key.objectid = bytenr; | 1489 | /* this will setup the path even if it fails to insert the back ref */ |
762 | key.type = BTRFS_EXTENT_ITEM_KEY; | 1490 | ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, |
763 | key.offset = num_bytes; | 1491 | path, bytenr, num_bytes, parent, |
764 | 1492 | root_objectid, owner, offset, | |
765 | /* first find the extent item and update its reference count */ | 1493 | refs_to_add, extent_op); |
766 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, | 1494 | if (ret == 0) |
767 | path, 0, 1); | 1495 | goto out; |
768 | if (ret < 0) { | ||
769 | btrfs_set_path_blocking(path); | ||
770 | return ret; | ||
771 | } | ||
772 | |||
773 | if (ret > 0) { | ||
774 | WARN_ON(1); | ||
775 | btrfs_free_path(path); | ||
776 | return -EIO; | ||
777 | } | ||
778 | l = path->nodes[0]; | ||
779 | 1496 | ||
780 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 1497 | if (ret != -EAGAIN) { |
781 | if (key.objectid != bytenr) { | 1498 | err = ret; |
782 | btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); | 1499 | goto out; |
783 | printk(KERN_ERR "btrfs wanted %llu found %llu\n", | ||
784 | (unsigned long long)bytenr, | ||
785 | (unsigned long long)key.objectid); | ||
786 | BUG(); | ||
787 | } | 1500 | } |
788 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); | ||
789 | |||
790 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | ||
791 | |||
792 | refs = btrfs_extent_refs(l, item); | ||
793 | btrfs_set_extent_refs(l, item, refs + refs_to_add); | ||
794 | btrfs_unlock_up_safe(path, 1); | ||
795 | 1501 | ||
796 | btrfs_mark_buffer_dirty(path->nodes[0]); | 1502 | leaf = path->nodes[0]; |
1503 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
1504 | refs = btrfs_extent_refs(leaf, item); | ||
1505 | btrfs_set_extent_refs(leaf, item, refs + refs_to_add); | ||
1506 | if (extent_op) | ||
1507 | __run_delayed_extent_op(extent_op, leaf, item); | ||
797 | 1508 | ||
1509 | btrfs_mark_buffer_dirty(leaf); | ||
798 | btrfs_release_path(root->fs_info->extent_root, path); | 1510 | btrfs_release_path(root->fs_info->extent_root, path); |
799 | 1511 | ||
800 | path->reada = 1; | 1512 | path->reada = 1; |
@@ -802,56 +1514,197 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, | |||
802 | 1514 | ||
803 | /* now insert the actual backref */ | 1515 | /* now insert the actual backref */ |
804 | ret = insert_extent_backref(trans, root->fs_info->extent_root, | 1516 | ret = insert_extent_backref(trans, root->fs_info->extent_root, |
805 | path, bytenr, parent, | 1517 | path, bytenr, parent, root_objectid, |
806 | ref_root, ref_generation, | 1518 | owner, offset, refs_to_add); |
807 | owner_objectid, refs_to_add); | ||
808 | BUG_ON(ret); | 1519 | BUG_ON(ret); |
1520 | out: | ||
809 | btrfs_free_path(path); | 1521 | btrfs_free_path(path); |
810 | return 0; | 1522 | return err; |
811 | } | 1523 | } |
812 | 1524 | ||
813 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1525 | static int run_delayed_data_ref(struct btrfs_trans_handle *trans, |
814 | struct btrfs_root *root, | 1526 | struct btrfs_root *root, |
815 | u64 bytenr, u64 num_bytes, u64 parent, | 1527 | struct btrfs_delayed_ref_node *node, |
816 | u64 ref_root, u64 ref_generation, | 1528 | struct btrfs_delayed_extent_op *extent_op, |
817 | u64 owner_objectid) | 1529 | int insert_reserved) |
818 | { | 1530 | { |
819 | int ret; | 1531 | int ret = 0; |
820 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | 1532 | struct btrfs_delayed_data_ref *ref; |
821 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 1533 | struct btrfs_key ins; |
822 | return 0; | 1534 | u64 parent = 0; |
1535 | u64 ref_root = 0; | ||
1536 | u64 flags = 0; | ||
823 | 1537 | ||
824 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, | 1538 | ins.objectid = node->bytenr; |
825 | 0, ref_root, 0, ref_generation, | 1539 | ins.offset = node->num_bytes; |
826 | owner_objectid); | 1540 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
1541 | |||
1542 | ref = btrfs_delayed_node_to_data_ref(node); | ||
1543 | if (node->type == BTRFS_SHARED_DATA_REF_KEY) | ||
1544 | parent = ref->parent; | ||
1545 | else | ||
1546 | ref_root = ref->root; | ||
1547 | |||
1548 | if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { | ||
1549 | if (extent_op) { | ||
1550 | BUG_ON(extent_op->update_key); | ||
1551 | flags |= extent_op->flags_to_set; | ||
1552 | } | ||
1553 | ret = alloc_reserved_file_extent(trans, root, | ||
1554 | parent, ref_root, flags, | ||
1555 | ref->objectid, ref->offset, | ||
1556 | &ins, node->ref_mod); | ||
1557 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1558 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | ||
1559 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | ||
1560 | node->num_bytes, parent, | ||
1561 | ref_root, ref->objectid, | ||
1562 | ref->offset, node->ref_mod, | ||
1563 | extent_op); | ||
1564 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { | ||
1565 | ret = __btrfs_free_extent(trans, root, node->bytenr, | ||
1566 | node->num_bytes, parent, | ||
1567 | ref_root, ref->objectid, | ||
1568 | ref->offset, node->ref_mod, | ||
1569 | extent_op); | ||
1570 | } else { | ||
1571 | BUG(); | ||
1572 | } | ||
827 | return ret; | 1573 | return ret; |
828 | } | 1574 | } |
829 | 1575 | ||
830 | static int drop_delayed_ref(struct btrfs_trans_handle *trans, | 1576 | static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, |
831 | struct btrfs_root *root, | 1577 | struct extent_buffer *leaf, |
832 | struct btrfs_delayed_ref_node *node) | 1578 | struct btrfs_extent_item *ei) |
1579 | { | ||
1580 | u64 flags = btrfs_extent_flags(leaf, ei); | ||
1581 | if (extent_op->update_flags) { | ||
1582 | flags |= extent_op->flags_to_set; | ||
1583 | btrfs_set_extent_flags(leaf, ei, flags); | ||
1584 | } | ||
1585 | |||
1586 | if (extent_op->update_key) { | ||
1587 | struct btrfs_tree_block_info *bi; | ||
1588 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
1589 | bi = (struct btrfs_tree_block_info *)(ei + 1); | ||
1590 | btrfs_set_tree_block_key(leaf, bi, &extent_op->key); | ||
1591 | } | ||
1592 | } | ||
1593 | |||
1594 | static int run_delayed_extent_op(struct btrfs_trans_handle *trans, | ||
1595 | struct btrfs_root *root, | ||
1596 | struct btrfs_delayed_ref_node *node, | ||
1597 | struct btrfs_delayed_extent_op *extent_op) | ||
1598 | { | ||
1599 | struct btrfs_key key; | ||
1600 | struct btrfs_path *path; | ||
1601 | struct btrfs_extent_item *ei; | ||
1602 | struct extent_buffer *leaf; | ||
1603 | u32 item_size; | ||
1604 | int ret; | ||
1605 | int err = 0; | ||
1606 | |||
1607 | path = btrfs_alloc_path(); | ||
1608 | if (!path) | ||
1609 | return -ENOMEM; | ||
1610 | |||
1611 | key.objectid = node->bytenr; | ||
1612 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
1613 | key.offset = node->num_bytes; | ||
1614 | |||
1615 | path->reada = 1; | ||
1616 | path->leave_spinning = 1; | ||
1617 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, | ||
1618 | path, 0, 1); | ||
1619 | if (ret < 0) { | ||
1620 | err = ret; | ||
1621 | goto out; | ||
1622 | } | ||
1623 | if (ret > 0) { | ||
1624 | err = -EIO; | ||
1625 | goto out; | ||
1626 | } | ||
1627 | |||
1628 | leaf = path->nodes[0]; | ||
1629 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
1630 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
1631 | if (item_size < sizeof(*ei)) { | ||
1632 | ret = convert_extent_item_v0(trans, root->fs_info->extent_root, | ||
1633 | path, (u64)-1, 0); | ||
1634 | if (ret < 0) { | ||
1635 | err = ret; | ||
1636 | goto out; | ||
1637 | } | ||
1638 | leaf = path->nodes[0]; | ||
1639 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
1640 | } | ||
1641 | #endif | ||
1642 | BUG_ON(item_size < sizeof(*ei)); | ||
1643 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
1644 | __run_delayed_extent_op(extent_op, leaf, ei); | ||
1645 | |||
1646 | btrfs_mark_buffer_dirty(leaf); | ||
1647 | out: | ||
1648 | btrfs_free_path(path); | ||
1649 | return err; | ||
1650 | } | ||
1651 | |||
1652 | static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | ||
1653 | struct btrfs_root *root, | ||
1654 | struct btrfs_delayed_ref_node *node, | ||
1655 | struct btrfs_delayed_extent_op *extent_op, | ||
1656 | int insert_reserved) | ||
833 | { | 1657 | { |
834 | int ret = 0; | 1658 | int ret = 0; |
835 | struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); | 1659 | struct btrfs_delayed_tree_ref *ref; |
1660 | struct btrfs_key ins; | ||
1661 | u64 parent = 0; | ||
1662 | u64 ref_root = 0; | ||
836 | 1663 | ||
837 | BUG_ON(node->ref_mod == 0); | 1664 | ins.objectid = node->bytenr; |
838 | ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, | 1665 | ins.offset = node->num_bytes; |
839 | node->parent, ref->root, ref->generation, | 1666 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
840 | ref->owner_objectid, ref->pin, node->ref_mod); | ||
841 | 1667 | ||
1668 | ref = btrfs_delayed_node_to_tree_ref(node); | ||
1669 | if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) | ||
1670 | parent = ref->parent; | ||
1671 | else | ||
1672 | ref_root = ref->root; | ||
1673 | |||
1674 | BUG_ON(node->ref_mod != 1); | ||
1675 | if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { | ||
1676 | BUG_ON(!extent_op || !extent_op->update_flags || | ||
1677 | !extent_op->update_key); | ||
1678 | ret = alloc_reserved_tree_block(trans, root, | ||
1679 | parent, ref_root, | ||
1680 | extent_op->flags_to_set, | ||
1681 | &extent_op->key, | ||
1682 | ref->level, &ins); | ||
1683 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1684 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | ||
1685 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | ||
1686 | node->num_bytes, parent, ref_root, | ||
1687 | ref->level, 0, 1, extent_op); | ||
1688 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { | ||
1689 | ret = __btrfs_free_extent(trans, root, node->bytenr, | ||
1690 | node->num_bytes, parent, ref_root, | ||
1691 | ref->level, 0, 1, extent_op); | ||
1692 | } else { | ||
1693 | BUG(); | ||
1694 | } | ||
842 | return ret; | 1695 | return ret; |
843 | } | 1696 | } |
844 | 1697 | ||
1698 | |||
845 | /* helper function to actually process a single delayed ref entry */ | 1699 | /* helper function to actually process a single delayed ref entry */ |
846 | static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, | 1700 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, |
847 | struct btrfs_root *root, | 1701 | struct btrfs_root *root, |
848 | struct btrfs_delayed_ref_node *node, | 1702 | struct btrfs_delayed_ref_node *node, |
849 | int insert_reserved) | 1703 | struct btrfs_delayed_extent_op *extent_op, |
1704 | int insert_reserved) | ||
850 | { | 1705 | { |
851 | int ret; | 1706 | int ret; |
852 | struct btrfs_delayed_ref *ref; | 1707 | if (btrfs_delayed_ref_is_head(node)) { |
853 | |||
854 | if (node->parent == (u64)-1) { | ||
855 | struct btrfs_delayed_ref_head *head; | 1708 | struct btrfs_delayed_ref_head *head; |
856 | /* | 1709 | /* |
857 | * we've hit the end of the chain and we were supposed | 1710 | * we've hit the end of the chain and we were supposed |
@@ -859,44 +1712,35 @@ static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
859 | * deleted before we ever needed to insert it, so all | 1712 | * deleted before we ever needed to insert it, so all |
860 | * we have to do is clean up the accounting | 1713 | * we have to do is clean up the accounting |
861 | */ | 1714 | */ |
1715 | BUG_ON(extent_op); | ||
1716 | head = btrfs_delayed_node_to_head(node); | ||
862 | if (insert_reserved) { | 1717 | if (insert_reserved) { |
1718 | if (head->is_data) { | ||
1719 | ret = btrfs_del_csums(trans, root, | ||
1720 | node->bytenr, | ||
1721 | node->num_bytes); | ||
1722 | BUG_ON(ret); | ||
1723 | } | ||
1724 | btrfs_update_pinned_extents(root, node->bytenr, | ||
1725 | node->num_bytes, 1); | ||
863 | update_reserved_extents(root, node->bytenr, | 1726 | update_reserved_extents(root, node->bytenr, |
864 | node->num_bytes, 0); | 1727 | node->num_bytes, 0); |
865 | } | 1728 | } |
866 | head = btrfs_delayed_node_to_head(node); | ||
867 | mutex_unlock(&head->mutex); | 1729 | mutex_unlock(&head->mutex); |
868 | return 0; | 1730 | return 0; |
869 | } | 1731 | } |
870 | 1732 | ||
871 | ref = btrfs_delayed_node_to_ref(node); | 1733 | if (node->type == BTRFS_TREE_BLOCK_REF_KEY || |
872 | if (ref->action == BTRFS_ADD_DELAYED_REF) { | 1734 | node->type == BTRFS_SHARED_BLOCK_REF_KEY) |
873 | if (insert_reserved) { | 1735 | ret = run_delayed_tree_ref(trans, root, node, extent_op, |
874 | struct btrfs_key ins; | 1736 | insert_reserved); |
875 | 1737 | else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || | |
876 | ins.objectid = node->bytenr; | 1738 | node->type == BTRFS_SHARED_DATA_REF_KEY) |
877 | ins.offset = node->num_bytes; | 1739 | ret = run_delayed_data_ref(trans, root, node, extent_op, |
878 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 1740 | insert_reserved); |
879 | 1741 | else | |
880 | /* record the full extent allocation */ | 1742 | BUG(); |
881 | ret = __btrfs_alloc_reserved_extent(trans, root, | 1743 | return ret; |
882 | node->parent, ref->root, | ||
883 | ref->generation, ref->owner_objectid, | ||
884 | &ins, node->ref_mod); | ||
885 | update_reserved_extents(root, node->bytenr, | ||
886 | node->num_bytes, 0); | ||
887 | } else { | ||
888 | /* just add one backref */ | ||
889 | ret = add_extent_ref(trans, root, node->bytenr, | ||
890 | node->num_bytes, | ||
891 | node->parent, ref->root, ref->generation, | ||
892 | ref->owner_objectid, node->ref_mod); | ||
893 | } | ||
894 | BUG_ON(ret); | ||
895 | } else if (ref->action == BTRFS_DROP_DELAYED_REF) { | ||
896 | WARN_ON(insert_reserved); | ||
897 | ret = drop_delayed_ref(trans, root, node); | ||
898 | } | ||
899 | return 0; | ||
900 | } | 1744 | } |
901 | 1745 | ||
902 | static noinline struct btrfs_delayed_ref_node * | 1746 | static noinline struct btrfs_delayed_ref_node * |
@@ -919,7 +1763,7 @@ again: | |||
919 | rb_node); | 1763 | rb_node); |
920 | if (ref->bytenr != head->node.bytenr) | 1764 | if (ref->bytenr != head->node.bytenr) |
921 | break; | 1765 | break; |
922 | if (btrfs_delayed_node_to_ref(ref)->action == action) | 1766 | if (ref->action == action) |
923 | return ref; | 1767 | return ref; |
924 | node = rb_prev(node); | 1768 | node = rb_prev(node); |
925 | } | 1769 | } |
@@ -937,6 +1781,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
937 | struct btrfs_delayed_ref_root *delayed_refs; | 1781 | struct btrfs_delayed_ref_root *delayed_refs; |
938 | struct btrfs_delayed_ref_node *ref; | 1782 | struct btrfs_delayed_ref_node *ref; |
939 | struct btrfs_delayed_ref_head *locked_ref = NULL; | 1783 | struct btrfs_delayed_ref_head *locked_ref = NULL; |
1784 | struct btrfs_delayed_extent_op *extent_op; | ||
940 | int ret; | 1785 | int ret; |
941 | int count = 0; | 1786 | int count = 0; |
942 | int must_insert_reserved = 0; | 1787 | int must_insert_reserved = 0; |
@@ -975,6 +1820,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
975 | must_insert_reserved = locked_ref->must_insert_reserved; | 1820 | must_insert_reserved = locked_ref->must_insert_reserved; |
976 | locked_ref->must_insert_reserved = 0; | 1821 | locked_ref->must_insert_reserved = 0; |
977 | 1822 | ||
1823 | extent_op = locked_ref->extent_op; | ||
1824 | locked_ref->extent_op = NULL; | ||
1825 | |||
978 | /* | 1826 | /* |
979 | * locked_ref is the head node, so we have to go one | 1827 | * locked_ref is the head node, so we have to go one |
980 | * node back for any delayed ref updates | 1828 | * node back for any delayed ref updates |
@@ -986,6 +1834,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
986 | * so that any accounting fixes can happen | 1834 | * so that any accounting fixes can happen |
987 | */ | 1835 | */ |
988 | ref = &locked_ref->node; | 1836 | ref = &locked_ref->node; |
1837 | |||
1838 | if (extent_op && must_insert_reserved) { | ||
1839 | kfree(extent_op); | ||
1840 | extent_op = NULL; | ||
1841 | } | ||
1842 | |||
1843 | if (extent_op) { | ||
1844 | spin_unlock(&delayed_refs->lock); | ||
1845 | |||
1846 | ret = run_delayed_extent_op(trans, root, | ||
1847 | ref, extent_op); | ||
1848 | BUG_ON(ret); | ||
1849 | kfree(extent_op); | ||
1850 | |||
1851 | cond_resched(); | ||
1852 | spin_lock(&delayed_refs->lock); | ||
1853 | continue; | ||
1854 | } | ||
1855 | |||
989 | list_del_init(&locked_ref->cluster); | 1856 | list_del_init(&locked_ref->cluster); |
990 | locked_ref = NULL; | 1857 | locked_ref = NULL; |
991 | } | 1858 | } |
@@ -993,14 +1860,17 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
993 | ref->in_tree = 0; | 1860 | ref->in_tree = 0; |
994 | rb_erase(&ref->rb_node, &delayed_refs->root); | 1861 | rb_erase(&ref->rb_node, &delayed_refs->root); |
995 | delayed_refs->num_entries--; | 1862 | delayed_refs->num_entries--; |
1863 | |||
996 | spin_unlock(&delayed_refs->lock); | 1864 | spin_unlock(&delayed_refs->lock); |
997 | 1865 | ||
998 | ret = run_one_delayed_ref(trans, root, ref, | 1866 | ret = run_one_delayed_ref(trans, root, ref, extent_op, |
999 | must_insert_reserved); | 1867 | must_insert_reserved); |
1000 | BUG_ON(ret); | 1868 | BUG_ON(ret); |
1001 | btrfs_put_delayed_ref(ref); | ||
1002 | 1869 | ||
1870 | btrfs_put_delayed_ref(ref); | ||
1871 | kfree(extent_op); | ||
1003 | count++; | 1872 | count++; |
1873 | |||
1004 | cond_resched(); | 1874 | cond_resched(); |
1005 | spin_lock(&delayed_refs->lock); | 1875 | spin_lock(&delayed_refs->lock); |
1006 | } | 1876 | } |
@@ -1095,25 +1965,112 @@ out: | |||
1095 | return 0; | 1965 | return 0; |
1096 | } | 1966 | } |
1097 | 1967 | ||
1098 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1968 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, |
1099 | struct btrfs_root *root, u64 objectid, u64 bytenr) | 1969 | struct btrfs_root *root, |
1970 | u64 bytenr, u64 num_bytes, u64 flags, | ||
1971 | int is_data) | ||
1972 | { | ||
1973 | struct btrfs_delayed_extent_op *extent_op; | ||
1974 | int ret; | ||
1975 | |||
1976 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
1977 | if (!extent_op) | ||
1978 | return -ENOMEM; | ||
1979 | |||
1980 | extent_op->flags_to_set = flags; | ||
1981 | extent_op->update_flags = 1; | ||
1982 | extent_op->update_key = 0; | ||
1983 | extent_op->is_data = is_data ? 1 : 0; | ||
1984 | |||
1985 | ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); | ||
1986 | if (ret) | ||
1987 | kfree(extent_op); | ||
1988 | return ret; | ||
1989 | } | ||
1990 | |||
1991 | static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, | ||
1992 | struct btrfs_root *root, | ||
1993 | struct btrfs_path *path, | ||
1994 | u64 objectid, u64 offset, u64 bytenr) | ||
1995 | { | ||
1996 | struct btrfs_delayed_ref_head *head; | ||
1997 | struct btrfs_delayed_ref_node *ref; | ||
1998 | struct btrfs_delayed_data_ref *data_ref; | ||
1999 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2000 | struct rb_node *node; | ||
2001 | int ret = 0; | ||
2002 | |||
2003 | ret = -ENOENT; | ||
2004 | delayed_refs = &trans->transaction->delayed_refs; | ||
2005 | spin_lock(&delayed_refs->lock); | ||
2006 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
2007 | if (!head) | ||
2008 | goto out; | ||
2009 | |||
2010 | if (!mutex_trylock(&head->mutex)) { | ||
2011 | atomic_inc(&head->node.refs); | ||
2012 | spin_unlock(&delayed_refs->lock); | ||
2013 | |||
2014 | btrfs_release_path(root->fs_info->extent_root, path); | ||
2015 | |||
2016 | mutex_lock(&head->mutex); | ||
2017 | mutex_unlock(&head->mutex); | ||
2018 | btrfs_put_delayed_ref(&head->node); | ||
2019 | return -EAGAIN; | ||
2020 | } | ||
2021 | |||
2022 | node = rb_prev(&head->node.rb_node); | ||
2023 | if (!node) | ||
2024 | goto out_unlock; | ||
2025 | |||
2026 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2027 | |||
2028 | if (ref->bytenr != bytenr) | ||
2029 | goto out_unlock; | ||
2030 | |||
2031 | ret = 1; | ||
2032 | if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) | ||
2033 | goto out_unlock; | ||
2034 | |||
2035 | data_ref = btrfs_delayed_node_to_data_ref(ref); | ||
2036 | |||
2037 | node = rb_prev(node); | ||
2038 | if (node) { | ||
2039 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2040 | if (ref->bytenr == bytenr) | ||
2041 | goto out_unlock; | ||
2042 | } | ||
2043 | |||
2044 | if (data_ref->root != root->root_key.objectid || | ||
2045 | data_ref->objectid != objectid || data_ref->offset != offset) | ||
2046 | goto out_unlock; | ||
2047 | |||
2048 | ret = 0; | ||
2049 | out_unlock: | ||
2050 | mutex_unlock(&head->mutex); | ||
2051 | out: | ||
2052 | spin_unlock(&delayed_refs->lock); | ||
2053 | return ret; | ||
2054 | } | ||
2055 | |||
2056 | static noinline int check_committed_ref(struct btrfs_trans_handle *trans, | ||
2057 | struct btrfs_root *root, | ||
2058 | struct btrfs_path *path, | ||
2059 | u64 objectid, u64 offset, u64 bytenr) | ||
1100 | { | 2060 | { |
1101 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 2061 | struct btrfs_root *extent_root = root->fs_info->extent_root; |
1102 | struct btrfs_path *path; | ||
1103 | struct extent_buffer *leaf; | 2062 | struct extent_buffer *leaf; |
1104 | struct btrfs_extent_ref *ref_item; | 2063 | struct btrfs_extent_data_ref *ref; |
2064 | struct btrfs_extent_inline_ref *iref; | ||
2065 | struct btrfs_extent_item *ei; | ||
1105 | struct btrfs_key key; | 2066 | struct btrfs_key key; |
1106 | struct btrfs_key found_key; | 2067 | u32 item_size; |
1107 | u64 ref_root; | ||
1108 | u64 last_snapshot; | ||
1109 | u32 nritems; | ||
1110 | int ret; | 2068 | int ret; |
1111 | 2069 | ||
1112 | key.objectid = bytenr; | 2070 | key.objectid = bytenr; |
1113 | key.offset = (u64)-1; | 2071 | key.offset = (u64)-1; |
1114 | key.type = BTRFS_EXTENT_ITEM_KEY; | 2072 | key.type = BTRFS_EXTENT_ITEM_KEY; |
1115 | 2073 | ||
1116 | path = btrfs_alloc_path(); | ||
1117 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); | 2074 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
1118 | if (ret < 0) | 2075 | if (ret < 0) |
1119 | goto out; | 2076 | goto out; |
@@ -1125,55 +2082,83 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | |||
1125 | 2082 | ||
1126 | path->slots[0]--; | 2083 | path->slots[0]--; |
1127 | leaf = path->nodes[0]; | 2084 | leaf = path->nodes[0]; |
1128 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 2085 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
1129 | 2086 | ||
1130 | if (found_key.objectid != bytenr || | 2087 | if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) |
1131 | found_key.type != BTRFS_EXTENT_ITEM_KEY) | ||
1132 | goto out; | 2088 | goto out; |
1133 | 2089 | ||
1134 | last_snapshot = btrfs_root_last_snapshot(&root->root_item); | 2090 | ret = 1; |
1135 | while (1) { | 2091 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
1136 | leaf = path->nodes[0]; | 2092 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
1137 | nritems = btrfs_header_nritems(leaf); | 2093 | if (item_size < sizeof(*ei)) { |
1138 | if (path->slots[0] >= nritems) { | 2094 | WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); |
1139 | ret = btrfs_next_leaf(extent_root, path); | 2095 | goto out; |
1140 | if (ret < 0) | 2096 | } |
1141 | goto out; | 2097 | #endif |
1142 | if (ret == 0) | 2098 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
1143 | continue; | ||
1144 | break; | ||
1145 | } | ||
1146 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
1147 | if (found_key.objectid != bytenr) | ||
1148 | break; | ||
1149 | 2099 | ||
1150 | if (found_key.type != BTRFS_EXTENT_REF_KEY) { | 2100 | if (item_size != sizeof(*ei) + |
1151 | path->slots[0]++; | 2101 | btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) |
1152 | continue; | 2102 | goto out; |
1153 | } | ||
1154 | 2103 | ||
1155 | ref_item = btrfs_item_ptr(leaf, path->slots[0], | 2104 | if (btrfs_extent_generation(leaf, ei) <= |
1156 | struct btrfs_extent_ref); | 2105 | btrfs_root_last_snapshot(&root->root_item)) |
1157 | ref_root = btrfs_ref_root(leaf, ref_item); | 2106 | goto out; |
1158 | if ((ref_root != root->root_key.objectid && | 2107 | |
1159 | ref_root != BTRFS_TREE_LOG_OBJECTID) || | 2108 | iref = (struct btrfs_extent_inline_ref *)(ei + 1); |
1160 | objectid != btrfs_ref_objectid(leaf, ref_item)) { | 2109 | if (btrfs_extent_inline_ref_type(leaf, iref) != |
1161 | ret = 1; | 2110 | BTRFS_EXTENT_DATA_REF_KEY) |
1162 | goto out; | 2111 | goto out; |
1163 | } | 2112 | |
1164 | if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { | 2113 | ref = (struct btrfs_extent_data_ref *)(&iref->offset); |
1165 | ret = 1; | 2114 | if (btrfs_extent_refs(leaf, ei) != |
2115 | btrfs_extent_data_ref_count(leaf, ref) || | ||
2116 | btrfs_extent_data_ref_root(leaf, ref) != | ||
2117 | root->root_key.objectid || | ||
2118 | btrfs_extent_data_ref_objectid(leaf, ref) != objectid || | ||
2119 | btrfs_extent_data_ref_offset(leaf, ref) != offset) | ||
2120 | goto out; | ||
2121 | |||
2122 | ret = 0; | ||
2123 | out: | ||
2124 | return ret; | ||
2125 | } | ||
2126 | |||
2127 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | ||
2128 | struct btrfs_root *root, | ||
2129 | u64 objectid, u64 offset, u64 bytenr) | ||
2130 | { | ||
2131 | struct btrfs_path *path; | ||
2132 | int ret; | ||
2133 | int ret2; | ||
2134 | |||
2135 | path = btrfs_alloc_path(); | ||
2136 | if (!path) | ||
2137 | return -ENOENT; | ||
2138 | |||
2139 | do { | ||
2140 | ret = check_committed_ref(trans, root, path, objectid, | ||
2141 | offset, bytenr); | ||
2142 | if (ret && ret != -ENOENT) | ||
1166 | goto out; | 2143 | goto out; |
1167 | } | ||
1168 | 2144 | ||
1169 | path->slots[0]++; | 2145 | ret2 = check_delayed_ref(trans, root, path, objectid, |
2146 | offset, bytenr); | ||
2147 | } while (ret2 == -EAGAIN); | ||
2148 | |||
2149 | if (ret2 && ret2 != -ENOENT) { | ||
2150 | ret = ret2; | ||
2151 | goto out; | ||
1170 | } | 2152 | } |
1171 | ret = 0; | 2153 | |
2154 | if (ret != -ENOENT || ret2 != -ENOENT) | ||
2155 | ret = 0; | ||
1172 | out: | 2156 | out: |
1173 | btrfs_free_path(path); | 2157 | btrfs_free_path(path); |
1174 | return ret; | 2158 | return ret; |
1175 | } | 2159 | } |
1176 | 2160 | ||
2161 | #if 0 | ||
1177 | int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2162 | int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1178 | struct extent_buffer *buf, u32 nr_extents) | 2163 | struct extent_buffer *buf, u32 nr_extents) |
1179 | { | 2164 | { |
@@ -1291,62 +2276,44 @@ static int refsort_cmp(const void *a_void, const void *b_void) | |||
1291 | return 1; | 2276 | return 1; |
1292 | return 0; | 2277 | return 0; |
1293 | } | 2278 | } |
2279 | #endif | ||
1294 | 2280 | ||
1295 | 2281 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |
1296 | noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | ||
1297 | struct btrfs_root *root, | 2282 | struct btrfs_root *root, |
1298 | struct extent_buffer *orig_buf, | 2283 | struct extent_buffer *buf, |
1299 | struct extent_buffer *buf, u32 *nr_extents) | 2284 | int full_backref, int inc) |
1300 | { | 2285 | { |
1301 | u64 bytenr; | 2286 | u64 bytenr; |
2287 | u64 num_bytes; | ||
2288 | u64 parent; | ||
1302 | u64 ref_root; | 2289 | u64 ref_root; |
1303 | u64 orig_root; | ||
1304 | u64 ref_generation; | ||
1305 | u64 orig_generation; | ||
1306 | struct refsort *sorted; | ||
1307 | u32 nritems; | 2290 | u32 nritems; |
1308 | u32 nr_file_extents = 0; | ||
1309 | struct btrfs_key key; | 2291 | struct btrfs_key key; |
1310 | struct btrfs_file_extent_item *fi; | 2292 | struct btrfs_file_extent_item *fi; |
1311 | int i; | 2293 | int i; |
1312 | int level; | 2294 | int level; |
1313 | int ret = 0; | 2295 | int ret = 0; |
1314 | int faili = 0; | ||
1315 | int refi = 0; | ||
1316 | int slot; | ||
1317 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 2296 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
1318 | u64, u64, u64, u64, u64, u64, u64, u64, u64); | 2297 | u64, u64, u64, u64, u64, u64); |
1319 | 2298 | ||
1320 | ref_root = btrfs_header_owner(buf); | 2299 | ref_root = btrfs_header_owner(buf); |
1321 | ref_generation = btrfs_header_generation(buf); | ||
1322 | orig_root = btrfs_header_owner(orig_buf); | ||
1323 | orig_generation = btrfs_header_generation(orig_buf); | ||
1324 | |||
1325 | nritems = btrfs_header_nritems(buf); | 2300 | nritems = btrfs_header_nritems(buf); |
1326 | level = btrfs_header_level(buf); | 2301 | level = btrfs_header_level(buf); |
1327 | 2302 | ||
1328 | sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); | 2303 | if (!root->ref_cows && level == 0) |
1329 | BUG_ON(!sorted); | 2304 | return 0; |
1330 | 2305 | ||
1331 | if (root->ref_cows) { | 2306 | if (inc) |
1332 | process_func = __btrfs_inc_extent_ref; | 2307 | process_func = btrfs_inc_extent_ref; |
1333 | } else { | 2308 | else |
1334 | if (level == 0 && | 2309 | process_func = btrfs_free_extent; |
1335 | root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) | 2310 | |
1336 | goto out; | 2311 | if (full_backref) |
1337 | if (level != 0 && | 2312 | parent = buf->start; |
1338 | root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) | 2313 | else |
1339 | goto out; | 2314 | parent = 0; |
1340 | process_func = __btrfs_update_extent_ref; | ||
1341 | } | ||
1342 | 2315 | ||
1343 | /* | ||
1344 | * we make two passes through the items. In the first pass we | ||
1345 | * only record the byte number and slot. Then we sort based on | ||
1346 | * byte number and do the actual work based on the sorted results | ||
1347 | */ | ||
1348 | for (i = 0; i < nritems; i++) { | 2316 | for (i = 0; i < nritems; i++) { |
1349 | cond_resched(); | ||
1350 | if (level == 0) { | 2317 | if (level == 0) { |
1351 | btrfs_item_key_to_cpu(buf, &key, i); | 2318 | btrfs_item_key_to_cpu(buf, &key, i); |
1352 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | 2319 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) |
@@ -1360,151 +2327,38 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1360 | if (bytenr == 0) | 2327 | if (bytenr == 0) |
1361 | continue; | 2328 | continue; |
1362 | 2329 | ||
1363 | nr_file_extents++; | 2330 | num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); |
1364 | sorted[refi].bytenr = bytenr; | 2331 | key.offset -= btrfs_file_extent_offset(buf, fi); |
1365 | sorted[refi].slot = i; | 2332 | ret = process_func(trans, root, bytenr, num_bytes, |
1366 | refi++; | 2333 | parent, ref_root, key.objectid, |
1367 | } else { | 2334 | key.offset); |
1368 | bytenr = btrfs_node_blockptr(buf, i); | 2335 | if (ret) |
1369 | sorted[refi].bytenr = bytenr; | ||
1370 | sorted[refi].slot = i; | ||
1371 | refi++; | ||
1372 | } | ||
1373 | } | ||
1374 | /* | ||
1375 | * if refi == 0, we didn't actually put anything into the sorted | ||
1376 | * array and we're done | ||
1377 | */ | ||
1378 | if (refi == 0) | ||
1379 | goto out; | ||
1380 | |||
1381 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
1382 | |||
1383 | for (i = 0; i < refi; i++) { | ||
1384 | cond_resched(); | ||
1385 | slot = sorted[i].slot; | ||
1386 | bytenr = sorted[i].bytenr; | ||
1387 | |||
1388 | if (level == 0) { | ||
1389 | btrfs_item_key_to_cpu(buf, &key, slot); | ||
1390 | fi = btrfs_item_ptr(buf, slot, | ||
1391 | struct btrfs_file_extent_item); | ||
1392 | |||
1393 | bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
1394 | if (bytenr == 0) | ||
1395 | continue; | ||
1396 | |||
1397 | ret = process_func(trans, root, bytenr, | ||
1398 | btrfs_file_extent_disk_num_bytes(buf, fi), | ||
1399 | orig_buf->start, buf->start, | ||
1400 | orig_root, ref_root, | ||
1401 | orig_generation, ref_generation, | ||
1402 | key.objectid); | ||
1403 | |||
1404 | if (ret) { | ||
1405 | faili = slot; | ||
1406 | WARN_ON(1); | ||
1407 | goto fail; | 2336 | goto fail; |
1408 | } | ||
1409 | } else { | 2337 | } else { |
1410 | ret = process_func(trans, root, bytenr, buf->len, | 2338 | bytenr = btrfs_node_blockptr(buf, i); |
1411 | orig_buf->start, buf->start, | 2339 | num_bytes = btrfs_level_size(root, level - 1); |
1412 | orig_root, ref_root, | 2340 | ret = process_func(trans, root, bytenr, num_bytes, |
1413 | orig_generation, ref_generation, | 2341 | parent, ref_root, level - 1, 0); |
1414 | level - 1); | 2342 | if (ret) |
1415 | if (ret) { | ||
1416 | faili = slot; | ||
1417 | WARN_ON(1); | ||
1418 | goto fail; | 2343 | goto fail; |
1419 | } | ||
1420 | } | 2344 | } |
1421 | } | 2345 | } |
1422 | out: | ||
1423 | kfree(sorted); | ||
1424 | if (nr_extents) { | ||
1425 | if (level == 0) | ||
1426 | *nr_extents = nr_file_extents; | ||
1427 | else | ||
1428 | *nr_extents = nritems; | ||
1429 | } | ||
1430 | return 0; | 2346 | return 0; |
1431 | fail: | 2347 | fail: |
1432 | kfree(sorted); | 2348 | BUG(); |
1433 | WARN_ON(1); | ||
1434 | return ret; | 2349 | return ret; |
1435 | } | 2350 | } |
1436 | 2351 | ||
1437 | int btrfs_update_ref(struct btrfs_trans_handle *trans, | 2352 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1438 | struct btrfs_root *root, struct extent_buffer *orig_buf, | 2353 | struct extent_buffer *buf, int full_backref) |
1439 | struct extent_buffer *buf, int start_slot, int nr) | ||
1440 | |||
1441 | { | 2354 | { |
1442 | u64 bytenr; | 2355 | return __btrfs_mod_ref(trans, root, buf, full_backref, 1); |
1443 | u64 ref_root; | 2356 | } |
1444 | u64 orig_root; | ||
1445 | u64 ref_generation; | ||
1446 | u64 orig_generation; | ||
1447 | struct btrfs_key key; | ||
1448 | struct btrfs_file_extent_item *fi; | ||
1449 | int i; | ||
1450 | int ret; | ||
1451 | int slot; | ||
1452 | int level; | ||
1453 | |||
1454 | BUG_ON(start_slot < 0); | ||
1455 | BUG_ON(start_slot + nr > btrfs_header_nritems(buf)); | ||
1456 | |||
1457 | ref_root = btrfs_header_owner(buf); | ||
1458 | ref_generation = btrfs_header_generation(buf); | ||
1459 | orig_root = btrfs_header_owner(orig_buf); | ||
1460 | orig_generation = btrfs_header_generation(orig_buf); | ||
1461 | level = btrfs_header_level(buf); | ||
1462 | |||
1463 | if (!root->ref_cows) { | ||
1464 | if (level == 0 && | ||
1465 | root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) | ||
1466 | return 0; | ||
1467 | if (level != 0 && | ||
1468 | root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) | ||
1469 | return 0; | ||
1470 | } | ||
1471 | 2357 | ||
1472 | for (i = 0, slot = start_slot; i < nr; i++, slot++) { | 2358 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1473 | cond_resched(); | 2359 | struct extent_buffer *buf, int full_backref) |
1474 | if (level == 0) { | 2360 | { |
1475 | btrfs_item_key_to_cpu(buf, &key, slot); | 2361 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0); |
1476 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
1477 | continue; | ||
1478 | fi = btrfs_item_ptr(buf, slot, | ||
1479 | struct btrfs_file_extent_item); | ||
1480 | if (btrfs_file_extent_type(buf, fi) == | ||
1481 | BTRFS_FILE_EXTENT_INLINE) | ||
1482 | continue; | ||
1483 | bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
1484 | if (bytenr == 0) | ||
1485 | continue; | ||
1486 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | ||
1487 | btrfs_file_extent_disk_num_bytes(buf, fi), | ||
1488 | orig_buf->start, buf->start, | ||
1489 | orig_root, ref_root, orig_generation, | ||
1490 | ref_generation, key.objectid); | ||
1491 | if (ret) | ||
1492 | goto fail; | ||
1493 | } else { | ||
1494 | bytenr = btrfs_node_blockptr(buf, slot); | ||
1495 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | ||
1496 | buf->len, orig_buf->start, | ||
1497 | buf->start, orig_root, ref_root, | ||
1498 | orig_generation, ref_generation, | ||
1499 | level - 1); | ||
1500 | if (ret) | ||
1501 | goto fail; | ||
1502 | } | ||
1503 | } | ||
1504 | return 0; | ||
1505 | fail: | ||
1506 | WARN_ON(1); | ||
1507 | return -1; | ||
1508 | } | 2362 | } |
1509 | 2363 | ||
1510 | static int write_one_cache_group(struct btrfs_trans_handle *trans, | 2364 | static int write_one_cache_group(struct btrfs_trans_handle *trans, |
@@ -2007,6 +2861,24 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
2007 | u64 old_val; | 2861 | u64 old_val; |
2008 | u64 byte_in_group; | 2862 | u64 byte_in_group; |
2009 | 2863 | ||
2864 | /* block accounting for super block */ | ||
2865 | spin_lock(&info->delalloc_lock); | ||
2866 | old_val = btrfs_super_bytes_used(&info->super_copy); | ||
2867 | if (alloc) | ||
2868 | old_val += num_bytes; | ||
2869 | else | ||
2870 | old_val -= num_bytes; | ||
2871 | btrfs_set_super_bytes_used(&info->super_copy, old_val); | ||
2872 | |||
2873 | /* block accounting for root item */ | ||
2874 | old_val = btrfs_root_used(&root->root_item); | ||
2875 | if (alloc) | ||
2876 | old_val += num_bytes; | ||
2877 | else | ||
2878 | old_val -= num_bytes; | ||
2879 | btrfs_set_root_used(&root->root_item, old_val); | ||
2880 | spin_unlock(&info->delalloc_lock); | ||
2881 | |||
2010 | while (total) { | 2882 | while (total) { |
2011 | cache = btrfs_lookup_block_group(info, bytenr); | 2883 | cache = btrfs_lookup_block_group(info, bytenr); |
2012 | if (!cache) | 2884 | if (!cache) |
@@ -2216,8 +3088,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
2216 | u64 header_owner = btrfs_header_owner(buf); | 3088 | u64 header_owner = btrfs_header_owner(buf); |
2217 | u64 header_transid = btrfs_header_generation(buf); | 3089 | u64 header_transid = btrfs_header_generation(buf); |
2218 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | 3090 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && |
2219 | header_owner != BTRFS_TREE_RELOC_OBJECTID && | ||
2220 | header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID && | ||
2221 | header_transid == trans->transid && | 3091 | header_transid == trans->transid && |
2222 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 3092 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
2223 | *must_clean = buf; | 3093 | *must_clean = buf; |
@@ -2235,63 +3105,77 @@ pinit: | |||
2235 | return 0; | 3105 | return 0; |
2236 | } | 3106 | } |
2237 | 3107 | ||
2238 | /* | 3108 | |
2239 | * remove an extent from the root, returns 0 on success | 3109 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
2240 | */ | 3110 | struct btrfs_root *root, |
2241 | static int __free_extent(struct btrfs_trans_handle *trans, | 3111 | u64 bytenr, u64 num_bytes, u64 parent, |
2242 | struct btrfs_root *root, | 3112 | u64 root_objectid, u64 owner_objectid, |
2243 | u64 bytenr, u64 num_bytes, u64 parent, | 3113 | u64 owner_offset, int refs_to_drop, |
2244 | u64 root_objectid, u64 ref_generation, | 3114 | struct btrfs_delayed_extent_op *extent_op) |
2245 | u64 owner_objectid, int pin, int mark_free, | ||
2246 | int refs_to_drop) | ||
2247 | { | 3115 | { |
2248 | struct btrfs_path *path; | ||
2249 | struct btrfs_key key; | 3116 | struct btrfs_key key; |
3117 | struct btrfs_path *path; | ||
2250 | struct btrfs_fs_info *info = root->fs_info; | 3118 | struct btrfs_fs_info *info = root->fs_info; |
2251 | struct btrfs_root *extent_root = info->extent_root; | 3119 | struct btrfs_root *extent_root = info->extent_root; |
2252 | struct extent_buffer *leaf; | 3120 | struct extent_buffer *leaf; |
3121 | struct btrfs_extent_item *ei; | ||
3122 | struct btrfs_extent_inline_ref *iref; | ||
2253 | int ret; | 3123 | int ret; |
3124 | int is_data; | ||
2254 | int extent_slot = 0; | 3125 | int extent_slot = 0; |
2255 | int found_extent = 0; | 3126 | int found_extent = 0; |
2256 | int num_to_del = 1; | 3127 | int num_to_del = 1; |
2257 | struct btrfs_extent_item *ei; | 3128 | u32 item_size; |
2258 | u32 refs; | 3129 | u64 refs; |
2259 | 3130 | ||
2260 | key.objectid = bytenr; | ||
2261 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | ||
2262 | key.offset = num_bytes; | ||
2263 | path = btrfs_alloc_path(); | 3131 | path = btrfs_alloc_path(); |
2264 | if (!path) | 3132 | if (!path) |
2265 | return -ENOMEM; | 3133 | return -ENOMEM; |
2266 | 3134 | ||
2267 | path->reada = 1; | 3135 | path->reada = 1; |
2268 | path->leave_spinning = 1; | 3136 | path->leave_spinning = 1; |
2269 | ret = lookup_extent_backref(trans, extent_root, path, | 3137 | |
2270 | bytenr, parent, root_objectid, | 3138 | is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; |
2271 | ref_generation, owner_objectid, 1); | 3139 | BUG_ON(!is_data && refs_to_drop != 1); |
3140 | |||
3141 | ret = lookup_extent_backref(trans, extent_root, path, &iref, | ||
3142 | bytenr, num_bytes, parent, | ||
3143 | root_objectid, owner_objectid, | ||
3144 | owner_offset); | ||
2272 | if (ret == 0) { | 3145 | if (ret == 0) { |
2273 | struct btrfs_key found_key; | ||
2274 | extent_slot = path->slots[0]; | 3146 | extent_slot = path->slots[0]; |
2275 | while (extent_slot > 0) { | 3147 | while (extent_slot >= 0) { |
2276 | extent_slot--; | 3148 | btrfs_item_key_to_cpu(path->nodes[0], &key, |
2277 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
2278 | extent_slot); | 3149 | extent_slot); |
2279 | if (found_key.objectid != bytenr) | 3150 | if (key.objectid != bytenr) |
2280 | break; | 3151 | break; |
2281 | if (found_key.type == BTRFS_EXTENT_ITEM_KEY && | 3152 | if (key.type == BTRFS_EXTENT_ITEM_KEY && |
2282 | found_key.offset == num_bytes) { | 3153 | key.offset == num_bytes) { |
2283 | found_extent = 1; | 3154 | found_extent = 1; |
2284 | break; | 3155 | break; |
2285 | } | 3156 | } |
2286 | if (path->slots[0] - extent_slot > 5) | 3157 | if (path->slots[0] - extent_slot > 5) |
2287 | break; | 3158 | break; |
3159 | extent_slot--; | ||
2288 | } | 3160 | } |
3161 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
3162 | item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); | ||
3163 | if (found_extent && item_size < sizeof(*ei)) | ||
3164 | found_extent = 0; | ||
3165 | #endif | ||
2289 | if (!found_extent) { | 3166 | if (!found_extent) { |
3167 | BUG_ON(iref); | ||
2290 | ret = remove_extent_backref(trans, extent_root, path, | 3168 | ret = remove_extent_backref(trans, extent_root, path, |
2291 | refs_to_drop); | 3169 | NULL, refs_to_drop, |
3170 | is_data); | ||
2292 | BUG_ON(ret); | 3171 | BUG_ON(ret); |
2293 | btrfs_release_path(extent_root, path); | 3172 | btrfs_release_path(extent_root, path); |
2294 | path->leave_spinning = 1; | 3173 | path->leave_spinning = 1; |
3174 | |||
3175 | key.objectid = bytenr; | ||
3176 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
3177 | key.offset = num_bytes; | ||
3178 | |||
2295 | ret = btrfs_search_slot(trans, extent_root, | 3179 | ret = btrfs_search_slot(trans, extent_root, |
2296 | &key, path, -1, 1); | 3180 | &key, path, -1, 1); |
2297 | if (ret) { | 3181 | if (ret) { |
@@ -2307,82 +3191,98 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2307 | btrfs_print_leaf(extent_root, path->nodes[0]); | 3191 | btrfs_print_leaf(extent_root, path->nodes[0]); |
2308 | WARN_ON(1); | 3192 | WARN_ON(1); |
2309 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " | 3193 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " |
2310 | "parent %llu root %llu gen %llu owner %llu\n", | 3194 | "parent %llu root %llu owner %llu offset %llu\n", |
2311 | (unsigned long long)bytenr, | 3195 | (unsigned long long)bytenr, |
2312 | (unsigned long long)parent, | 3196 | (unsigned long long)parent, |
2313 | (unsigned long long)root_objectid, | 3197 | (unsigned long long)root_objectid, |
2314 | (unsigned long long)ref_generation, | 3198 | (unsigned long long)owner_objectid, |
2315 | (unsigned long long)owner_objectid); | 3199 | (unsigned long long)owner_offset); |
2316 | } | 3200 | } |
2317 | 3201 | ||
2318 | leaf = path->nodes[0]; | 3202 | leaf = path->nodes[0]; |
3203 | item_size = btrfs_item_size_nr(leaf, extent_slot); | ||
3204 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
3205 | if (item_size < sizeof(*ei)) { | ||
3206 | BUG_ON(found_extent || extent_slot != path->slots[0]); | ||
3207 | ret = convert_extent_item_v0(trans, extent_root, path, | ||
3208 | owner_objectid, 0); | ||
3209 | BUG_ON(ret < 0); | ||
3210 | |||
3211 | btrfs_release_path(extent_root, path); | ||
3212 | path->leave_spinning = 1; | ||
3213 | |||
3214 | key.objectid = bytenr; | ||
3215 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
3216 | key.offset = num_bytes; | ||
3217 | |||
3218 | ret = btrfs_search_slot(trans, extent_root, &key, path, | ||
3219 | -1, 1); | ||
3220 | if (ret) { | ||
3221 | printk(KERN_ERR "umm, got %d back from search" | ||
3222 | ", was looking for %llu\n", ret, | ||
3223 | (unsigned long long)bytenr); | ||
3224 | btrfs_print_leaf(extent_root, path->nodes[0]); | ||
3225 | } | ||
3226 | BUG_ON(ret); | ||
3227 | extent_slot = path->slots[0]; | ||
3228 | leaf = path->nodes[0]; | ||
3229 | item_size = btrfs_item_size_nr(leaf, extent_slot); | ||
3230 | } | ||
3231 | #endif | ||
3232 | BUG_ON(item_size < sizeof(*ei)); | ||
2319 | ei = btrfs_item_ptr(leaf, extent_slot, | 3233 | ei = btrfs_item_ptr(leaf, extent_slot, |
2320 | struct btrfs_extent_item); | 3234 | struct btrfs_extent_item); |
2321 | refs = btrfs_extent_refs(leaf, ei); | 3235 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { |
2322 | 3236 | struct btrfs_tree_block_info *bi; | |
2323 | /* | 3237 | BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); |
2324 | * we're not allowed to delete the extent item if there | 3238 | bi = (struct btrfs_tree_block_info *)(ei + 1); |
2325 | * are other delayed ref updates pending | 3239 | WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); |
2326 | */ | 3240 | } |
2327 | 3241 | ||
3242 | refs = btrfs_extent_refs(leaf, ei); | ||
2328 | BUG_ON(refs < refs_to_drop); | 3243 | BUG_ON(refs < refs_to_drop); |
2329 | refs -= refs_to_drop; | 3244 | refs -= refs_to_drop; |
2330 | btrfs_set_extent_refs(leaf, ei, refs); | ||
2331 | btrfs_mark_buffer_dirty(leaf); | ||
2332 | 3245 | ||
2333 | if (refs == 0 && found_extent && | 3246 | if (refs > 0) { |
2334 | path->slots[0] == extent_slot + 1) { | 3247 | if (extent_op) |
2335 | struct btrfs_extent_ref *ref; | 3248 | __run_delayed_extent_op(extent_op, leaf, ei); |
2336 | ref = btrfs_item_ptr(leaf, path->slots[0], | 3249 | /* |
2337 | struct btrfs_extent_ref); | 3250 | * In the case of inline back ref, reference count will |
2338 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); | 3251 | * be updated by remove_extent_backref |
2339 | /* if the back ref and the extent are next to each other | ||
2340 | * they get deleted below in one shot | ||
2341 | */ | 3252 | */ |
2342 | path->slots[0] = extent_slot; | 3253 | if (iref) { |
2343 | num_to_del = 2; | 3254 | BUG_ON(!found_extent); |
2344 | } else if (found_extent) { | 3255 | } else { |
2345 | /* otherwise delete the extent back ref */ | 3256 | btrfs_set_extent_refs(leaf, ei, refs); |
2346 | ret = remove_extent_backref(trans, extent_root, path, | 3257 | btrfs_mark_buffer_dirty(leaf); |
2347 | refs_to_drop); | 3258 | } |
2348 | BUG_ON(ret); | 3259 | if (found_extent) { |
2349 | /* if refs are 0, we need to setup the path for deletion */ | 3260 | ret = remove_extent_backref(trans, extent_root, path, |
2350 | if (refs == 0) { | 3261 | iref, refs_to_drop, |
2351 | btrfs_release_path(extent_root, path); | 3262 | is_data); |
2352 | path->leave_spinning = 1; | ||
2353 | ret = btrfs_search_slot(trans, extent_root, &key, path, | ||
2354 | -1, 1); | ||
2355 | BUG_ON(ret); | 3263 | BUG_ON(ret); |
2356 | } | 3264 | } |
2357 | } | 3265 | } else { |
2358 | 3266 | int mark_free = 0; | |
2359 | if (refs == 0) { | ||
2360 | u64 super_used; | ||
2361 | u64 root_used; | ||
2362 | struct extent_buffer *must_clean = NULL; | 3267 | struct extent_buffer *must_clean = NULL; |
2363 | 3268 | ||
2364 | if (pin) { | 3269 | if (found_extent) { |
2365 | ret = pin_down_bytes(trans, root, path, | 3270 | BUG_ON(is_data && refs_to_drop != |
2366 | bytenr, num_bytes, | 3271 | extent_data_ref_count(root, path, iref)); |
2367 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, | 3272 | if (iref) { |
2368 | &must_clean); | 3273 | BUG_ON(path->slots[0] != extent_slot); |
2369 | if (ret > 0) | 3274 | } else { |
2370 | mark_free = 1; | 3275 | BUG_ON(path->slots[0] != extent_slot + 1); |
2371 | BUG_ON(ret < 0); | 3276 | path->slots[0] = extent_slot; |
3277 | num_to_del = 2; | ||
3278 | } | ||
2372 | } | 3279 | } |
2373 | 3280 | ||
2374 | /* block accounting for super block */ | 3281 | ret = pin_down_bytes(trans, root, path, bytenr, |
2375 | spin_lock(&info->delalloc_lock); | 3282 | num_bytes, is_data, &must_clean); |
2376 | super_used = btrfs_super_bytes_used(&info->super_copy); | 3283 | if (ret > 0) |
2377 | btrfs_set_super_bytes_used(&info->super_copy, | 3284 | mark_free = 1; |
2378 | super_used - num_bytes); | 3285 | BUG_ON(ret < 0); |
2379 | |||
2380 | /* block accounting for root item */ | ||
2381 | root_used = btrfs_root_used(&root->root_item); | ||
2382 | btrfs_set_root_used(&root->root_item, | ||
2383 | root_used - num_bytes); | ||
2384 | spin_unlock(&info->delalloc_lock); | ||
2385 | |||
2386 | /* | 3286 | /* |
2387 | * it is going to be very rare for someone to be waiting | 3287 | * it is going to be very rare for someone to be waiting |
2388 | * on the block we're freeing. del_items might need to | 3288 | * on the block we're freeing. del_items might need to |
@@ -2403,7 +3303,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2403 | free_extent_buffer(must_clean); | 3303 | free_extent_buffer(must_clean); |
2404 | } | 3304 | } |
2405 | 3305 | ||
2406 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 3306 | if (is_data) { |
2407 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 3307 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
2408 | BUG_ON(ret); | 3308 | BUG_ON(ret); |
2409 | } else { | 3309 | } else { |
@@ -2421,34 +3321,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2421 | } | 3321 | } |
2422 | 3322 | ||
2423 | /* | 3323 | /* |
2424 | * remove an extent from the root, returns 0 on success | ||
2425 | */ | ||
2426 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | ||
2427 | struct btrfs_root *root, | ||
2428 | u64 bytenr, u64 num_bytes, u64 parent, | ||
2429 | u64 root_objectid, u64 ref_generation, | ||
2430 | u64 owner_objectid, int pin, | ||
2431 | int refs_to_drop) | ||
2432 | { | ||
2433 | WARN_ON(num_bytes < root->sectorsize); | ||
2434 | |||
2435 | /* | ||
2436 | * if metadata always pin | ||
2437 | * if data pin when any transaction has committed this | ||
2438 | */ | ||
2439 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
2440 | ref_generation != trans->transid) | ||
2441 | pin = 1; | ||
2442 | |||
2443 | if (ref_generation != trans->transid) | ||
2444 | pin = 1; | ||
2445 | |||
2446 | return __free_extent(trans, root, bytenr, num_bytes, parent, | ||
2447 | root_objectid, ref_generation, | ||
2448 | owner_objectid, pin, pin == 0, refs_to_drop); | ||
2449 | } | ||
2450 | |||
2451 | /* | ||
2452 | * when we free an extent, it is possible (and likely) that we free the last | 3324 | * when we free an extent, it is possible (and likely) that we free the last |
2453 | * delayed ref for that extent as well. This searches the delayed ref tree for | 3325 | * delayed ref for that extent as well. This searches the delayed ref tree for |
2454 | * a given extent, and if there are no other delayed refs to be processed, it | 3326 | * a given extent, and if there are no other delayed refs to be processed, it |
@@ -2479,6 +3351,13 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
2479 | if (ref->bytenr == bytenr) | 3351 | if (ref->bytenr == bytenr) |
2480 | goto out; | 3352 | goto out; |
2481 | 3353 | ||
3354 | if (head->extent_op) { | ||
3355 | if (!head->must_insert_reserved) | ||
3356 | goto out; | ||
3357 | kfree(head->extent_op); | ||
3358 | head->extent_op = NULL; | ||
3359 | } | ||
3360 | |||
2482 | /* | 3361 | /* |
2483 | * waiting for the lock here would deadlock. If someone else has it | 3362 | * waiting for the lock here would deadlock. If someone else has it |
2484 | * locked they are already in the process of dropping it anyway | 3363 | * locked they are already in the process of dropping it anyway |
@@ -2507,7 +3386,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
2507 | spin_unlock(&delayed_refs->lock); | 3386 | spin_unlock(&delayed_refs->lock); |
2508 | 3387 | ||
2509 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 3388 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, |
2510 | &head->node, head->must_insert_reserved); | 3389 | &head->node, head->extent_op, |
3390 | head->must_insert_reserved); | ||
2511 | BUG_ON(ret); | 3391 | BUG_ON(ret); |
2512 | btrfs_put_delayed_ref(&head->node); | 3392 | btrfs_put_delayed_ref(&head->node); |
2513 | return 0; | 3393 | return 0; |
@@ -2519,32 +3399,32 @@ out: | |||
2519 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 3399 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
2520 | struct btrfs_root *root, | 3400 | struct btrfs_root *root, |
2521 | u64 bytenr, u64 num_bytes, u64 parent, | 3401 | u64 bytenr, u64 num_bytes, u64 parent, |
2522 | u64 root_objectid, u64 ref_generation, | 3402 | u64 root_objectid, u64 owner, u64 offset) |
2523 | u64 owner_objectid, int pin) | ||
2524 | { | 3403 | { |
2525 | int ret; | 3404 | int ret; |
2526 | 3405 | ||
2527 | /* | 3406 | /* |
2528 | * tree log blocks never actually go into the extent allocation | 3407 | * tree log blocks never actually go into the extent allocation |
2529 | * tree, just update pinning info and exit early. | 3408 | * tree, just update pinning info and exit early. |
2530 | * | ||
2531 | * data extents referenced by the tree log do need to have | ||
2532 | * their reference counts bumped. | ||
2533 | */ | 3409 | */ |
2534 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && | 3410 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
2535 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | 3411 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
2536 | /* unlocks the pinned mutex */ | 3412 | /* unlocks the pinned mutex */ |
2537 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3413 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); |
2538 | update_reserved_extents(root, bytenr, num_bytes, 0); | 3414 | update_reserved_extents(root, bytenr, num_bytes, 0); |
2539 | ret = 0; | 3415 | ret = 0; |
2540 | } else { | 3416 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
2541 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, | 3417 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
2542 | root_objectid, ref_generation, | 3418 | parent, root_objectid, (int)owner, |
2543 | owner_objectid, | 3419 | BTRFS_DROP_DELAYED_REF, NULL); |
2544 | BTRFS_DROP_DELAYED_REF, 1); | ||
2545 | BUG_ON(ret); | 3420 | BUG_ON(ret); |
2546 | ret = check_ref_cleanup(trans, root, bytenr); | 3421 | ret = check_ref_cleanup(trans, root, bytenr); |
2547 | BUG_ON(ret); | 3422 | BUG_ON(ret); |
3423 | } else { | ||
3424 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | ||
3425 | parent, root_objectid, owner, | ||
3426 | offset, BTRFS_DROP_DELAYED_REF, NULL); | ||
3427 | BUG_ON(ret); | ||
2548 | } | 3428 | } |
2549 | return ret; | 3429 | return ret; |
2550 | } | 3430 | } |
@@ -2719,7 +3599,7 @@ refill_cluster: | |||
2719 | last_ptr_loop = 0; | 3599 | last_ptr_loop = 0; |
2720 | 3600 | ||
2721 | /* allocate a cluster in this block group */ | 3601 | /* allocate a cluster in this block group */ |
2722 | ret = btrfs_find_space_cluster(trans, | 3602 | ret = btrfs_find_space_cluster(trans, root, |
2723 | block_group, last_ptr, | 3603 | block_group, last_ptr, |
2724 | offset, num_bytes, | 3604 | offset, num_bytes, |
2725 | empty_cluster + empty_size); | 3605 | empty_cluster + empty_size); |
@@ -2969,99 +3849,147 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
2969 | return ret; | 3849 | return ret; |
2970 | } | 3850 | } |
2971 | 3851 | ||
2972 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | 3852 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
2973 | struct btrfs_root *root, u64 parent, | 3853 | struct btrfs_root *root, |
2974 | u64 root_objectid, u64 ref_generation, | 3854 | u64 parent, u64 root_objectid, |
2975 | u64 owner, struct btrfs_key *ins, | 3855 | u64 flags, u64 owner, u64 offset, |
2976 | int ref_mod) | 3856 | struct btrfs_key *ins, int ref_mod) |
2977 | { | 3857 | { |
2978 | int ret; | 3858 | int ret; |
2979 | u64 super_used; | 3859 | struct btrfs_fs_info *fs_info = root->fs_info; |
2980 | u64 root_used; | ||
2981 | u64 num_bytes = ins->offset; | ||
2982 | u32 sizes[2]; | ||
2983 | struct btrfs_fs_info *info = root->fs_info; | ||
2984 | struct btrfs_root *extent_root = info->extent_root; | ||
2985 | struct btrfs_extent_item *extent_item; | 3860 | struct btrfs_extent_item *extent_item; |
2986 | struct btrfs_extent_ref *ref; | 3861 | struct btrfs_extent_inline_ref *iref; |
2987 | struct btrfs_path *path; | 3862 | struct btrfs_path *path; |
2988 | struct btrfs_key keys[2]; | 3863 | struct extent_buffer *leaf; |
2989 | 3864 | int type; | |
2990 | if (parent == 0) | 3865 | u32 size; |
2991 | parent = ins->objectid; | ||
2992 | |||
2993 | /* block accounting for super block */ | ||
2994 | spin_lock(&info->delalloc_lock); | ||
2995 | super_used = btrfs_super_bytes_used(&info->super_copy); | ||
2996 | btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); | ||
2997 | 3866 | ||
2998 | /* block accounting for root item */ | 3867 | if (parent > 0) |
2999 | root_used = btrfs_root_used(&root->root_item); | 3868 | type = BTRFS_SHARED_DATA_REF_KEY; |
3000 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); | 3869 | else |
3001 | spin_unlock(&info->delalloc_lock); | 3870 | type = BTRFS_EXTENT_DATA_REF_KEY; |
3002 | 3871 | ||
3003 | memcpy(&keys[0], ins, sizeof(*ins)); | 3872 | size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); |
3004 | keys[1].objectid = ins->objectid; | ||
3005 | keys[1].type = BTRFS_EXTENT_REF_KEY; | ||
3006 | keys[1].offset = parent; | ||
3007 | sizes[0] = sizeof(*extent_item); | ||
3008 | sizes[1] = sizeof(*ref); | ||
3009 | 3873 | ||
3010 | path = btrfs_alloc_path(); | 3874 | path = btrfs_alloc_path(); |
3011 | BUG_ON(!path); | 3875 | BUG_ON(!path); |
3012 | 3876 | ||
3013 | path->leave_spinning = 1; | 3877 | path->leave_spinning = 1; |
3014 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, | 3878 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
3015 | sizes, 2); | 3879 | ins, size); |
3016 | BUG_ON(ret); | 3880 | BUG_ON(ret); |
3017 | 3881 | ||
3018 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3882 | leaf = path->nodes[0]; |
3883 | extent_item = btrfs_item_ptr(leaf, path->slots[0], | ||
3019 | struct btrfs_extent_item); | 3884 | struct btrfs_extent_item); |
3020 | btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); | 3885 | btrfs_set_extent_refs(leaf, extent_item, ref_mod); |
3021 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | 3886 | btrfs_set_extent_generation(leaf, extent_item, trans->transid); |
3022 | struct btrfs_extent_ref); | 3887 | btrfs_set_extent_flags(leaf, extent_item, |
3023 | 3888 | flags | BTRFS_EXTENT_FLAG_DATA); | |
3024 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); | 3889 | |
3025 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); | 3890 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); |
3026 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); | 3891 | btrfs_set_extent_inline_ref_type(leaf, iref, type); |
3027 | btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); | 3892 | if (parent > 0) { |
3893 | struct btrfs_shared_data_ref *ref; | ||
3894 | ref = (struct btrfs_shared_data_ref *)(iref + 1); | ||
3895 | btrfs_set_extent_inline_ref_offset(leaf, iref, parent); | ||
3896 | btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); | ||
3897 | } else { | ||
3898 | struct btrfs_extent_data_ref *ref; | ||
3899 | ref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
3900 | btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); | ||
3901 | btrfs_set_extent_data_ref_objectid(leaf, ref, owner); | ||
3902 | btrfs_set_extent_data_ref_offset(leaf, ref, offset); | ||
3903 | btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); | ||
3904 | } | ||
3028 | 3905 | ||
3029 | btrfs_mark_buffer_dirty(path->nodes[0]); | 3906 | btrfs_mark_buffer_dirty(path->nodes[0]); |
3030 | |||
3031 | trans->alloc_exclude_start = 0; | ||
3032 | trans->alloc_exclude_nr = 0; | ||
3033 | btrfs_free_path(path); | 3907 | btrfs_free_path(path); |
3034 | 3908 | ||
3035 | if (ret) | 3909 | ret = update_block_group(trans, root, ins->objectid, ins->offset, |
3036 | goto out; | 3910 | 1, 0); |
3037 | |||
3038 | ret = update_block_group(trans, root, ins->objectid, | ||
3039 | ins->offset, 1, 0); | ||
3040 | if (ret) { | 3911 | if (ret) { |
3041 | printk(KERN_ERR "btrfs update block group failed for %llu " | 3912 | printk(KERN_ERR "btrfs update block group failed for %llu " |
3042 | "%llu\n", (unsigned long long)ins->objectid, | 3913 | "%llu\n", (unsigned long long)ins->objectid, |
3043 | (unsigned long long)ins->offset); | 3914 | (unsigned long long)ins->offset); |
3044 | BUG(); | 3915 | BUG(); |
3045 | } | 3916 | } |
3046 | out: | ||
3047 | return ret; | 3917 | return ret; |
3048 | } | 3918 | } |
3049 | 3919 | ||
3050 | int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | 3920 | static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, |
3051 | struct btrfs_root *root, u64 parent, | 3921 | struct btrfs_root *root, |
3052 | u64 root_objectid, u64 ref_generation, | 3922 | u64 parent, u64 root_objectid, |
3053 | u64 owner, struct btrfs_key *ins) | 3923 | u64 flags, struct btrfs_disk_key *key, |
3924 | int level, struct btrfs_key *ins) | ||
3054 | { | 3925 | { |
3055 | int ret; | 3926 | int ret; |
3927 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3928 | struct btrfs_extent_item *extent_item; | ||
3929 | struct btrfs_tree_block_info *block_info; | ||
3930 | struct btrfs_extent_inline_ref *iref; | ||
3931 | struct btrfs_path *path; | ||
3932 | struct extent_buffer *leaf; | ||
3933 | u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); | ||
3056 | 3934 | ||
3057 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) | 3935 | path = btrfs_alloc_path(); |
3058 | return 0; | 3936 | BUG_ON(!path); |
3059 | 3937 | ||
3060 | ret = btrfs_add_delayed_ref(trans, ins->objectid, | 3938 | path->leave_spinning = 1; |
3061 | ins->offset, parent, root_objectid, | 3939 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
3062 | ref_generation, owner, | 3940 | ins, size); |
3063 | BTRFS_ADD_DELAYED_EXTENT, 0); | ||
3064 | BUG_ON(ret); | 3941 | BUG_ON(ret); |
3942 | |||
3943 | leaf = path->nodes[0]; | ||
3944 | extent_item = btrfs_item_ptr(leaf, path->slots[0], | ||
3945 | struct btrfs_extent_item); | ||
3946 | btrfs_set_extent_refs(leaf, extent_item, 1); | ||
3947 | btrfs_set_extent_generation(leaf, extent_item, trans->transid); | ||
3948 | btrfs_set_extent_flags(leaf, extent_item, | ||
3949 | flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); | ||
3950 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); | ||
3951 | |||
3952 | btrfs_set_tree_block_key(leaf, block_info, key); | ||
3953 | btrfs_set_tree_block_level(leaf, block_info, level); | ||
3954 | |||
3955 | iref = (struct btrfs_extent_inline_ref *)(block_info + 1); | ||
3956 | if (parent > 0) { | ||
3957 | BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); | ||
3958 | btrfs_set_extent_inline_ref_type(leaf, iref, | ||
3959 | BTRFS_SHARED_BLOCK_REF_KEY); | ||
3960 | btrfs_set_extent_inline_ref_offset(leaf, iref, parent); | ||
3961 | } else { | ||
3962 | btrfs_set_extent_inline_ref_type(leaf, iref, | ||
3963 | BTRFS_TREE_BLOCK_REF_KEY); | ||
3964 | btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); | ||
3965 | } | ||
3966 | |||
3967 | btrfs_mark_buffer_dirty(leaf); | ||
3968 | btrfs_free_path(path); | ||
3969 | |||
3970 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | ||
3971 | 1, 0); | ||
3972 | if (ret) { | ||
3973 | printk(KERN_ERR "btrfs update block group failed for %llu " | ||
3974 | "%llu\n", (unsigned long long)ins->objectid, | ||
3975 | (unsigned long long)ins->offset); | ||
3976 | BUG(); | ||
3977 | } | ||
3978 | return ret; | ||
3979 | } | ||
3980 | |||
3981 | int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | ||
3982 | struct btrfs_root *root, | ||
3983 | u64 root_objectid, u64 owner, | ||
3984 | u64 offset, struct btrfs_key *ins) | ||
3985 | { | ||
3986 | int ret; | ||
3987 | |||
3988 | BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); | ||
3989 | |||
3990 | ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, | ||
3991 | 0, root_objectid, owner, offset, | ||
3992 | BTRFS_ADD_DELAYED_EXTENT, NULL); | ||
3065 | return ret; | 3993 | return ret; |
3066 | } | 3994 | } |
3067 | 3995 | ||
@@ -3070,10 +3998,10 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3070 | * an extent has been allocated and makes sure to clear the free | 3998 | * an extent has been allocated and makes sure to clear the free |
3071 | * space cache bits as well | 3999 | * space cache bits as well |
3072 | */ | 4000 | */ |
3073 | int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | 4001 | int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, |
3074 | struct btrfs_root *root, u64 parent, | 4002 | struct btrfs_root *root, |
3075 | u64 root_objectid, u64 ref_generation, | 4003 | u64 root_objectid, u64 owner, u64 offset, |
3076 | u64 owner, struct btrfs_key *ins) | 4004 | struct btrfs_key *ins) |
3077 | { | 4005 | { |
3078 | int ret; | 4006 | int ret; |
3079 | struct btrfs_block_group_cache *block_group; | 4007 | struct btrfs_block_group_cache *block_group; |
@@ -3087,8 +4015,8 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | |||
3087 | ins->offset); | 4015 | ins->offset); |
3088 | BUG_ON(ret); | 4016 | BUG_ON(ret); |
3089 | btrfs_put_block_group(block_group); | 4017 | btrfs_put_block_group(block_group); |
3090 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 4018 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
3091 | ref_generation, owner, ins, 1); | 4019 | 0, owner, offset, ins, 1); |
3092 | return ret; | 4020 | return ret; |
3093 | } | 4021 | } |
3094 | 4022 | ||
@@ -3099,26 +4027,48 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | |||
3099 | * | 4027 | * |
3100 | * returns 0 if everything worked, non-zero otherwise. | 4028 | * returns 0 if everything worked, non-zero otherwise. |
3101 | */ | 4029 | */ |
3102 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | 4030 | static int alloc_tree_block(struct btrfs_trans_handle *trans, |
3103 | struct btrfs_root *root, | 4031 | struct btrfs_root *root, |
3104 | u64 num_bytes, u64 parent, u64 min_alloc_size, | 4032 | u64 num_bytes, u64 parent, u64 root_objectid, |
3105 | u64 root_objectid, u64 ref_generation, | 4033 | struct btrfs_disk_key *key, int level, |
3106 | u64 owner_objectid, u64 empty_size, u64 hint_byte, | 4034 | u64 empty_size, u64 hint_byte, u64 search_end, |
3107 | u64 search_end, struct btrfs_key *ins, u64 data) | 4035 | struct btrfs_key *ins) |
3108 | { | 4036 | { |
3109 | int ret; | 4037 | int ret; |
3110 | ret = __btrfs_reserve_extent(trans, root, num_bytes, | 4038 | u64 flags = 0; |
3111 | min_alloc_size, empty_size, hint_byte, | 4039 | |
3112 | search_end, ins, data); | 4040 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4041 | empty_size, hint_byte, search_end, | ||
4042 | ins, 0); | ||
3113 | BUG_ON(ret); | 4043 | BUG_ON(ret); |
4044 | |||
4045 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
4046 | if (parent == 0) | ||
4047 | parent = ins->objectid; | ||
4048 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
4049 | } else | ||
4050 | BUG_ON(parent > 0); | ||
4051 | |||
4052 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3114 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 4053 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
3115 | ret = btrfs_add_delayed_ref(trans, ins->objectid, | 4054 | struct btrfs_delayed_extent_op *extent_op; |
3116 | ins->offset, parent, root_objectid, | 4055 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); |
3117 | ref_generation, owner_objectid, | 4056 | BUG_ON(!extent_op); |
3118 | BTRFS_ADD_DELAYED_EXTENT, 0); | 4057 | if (key) |
4058 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
4059 | else | ||
4060 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
4061 | extent_op->flags_to_set = flags; | ||
4062 | extent_op->update_key = 1; | ||
4063 | extent_op->update_flags = 1; | ||
4064 | extent_op->is_data = 0; | ||
4065 | |||
4066 | ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, | ||
4067 | ins->offset, parent, root_objectid, | ||
4068 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
4069 | extent_op); | ||
3119 | BUG_ON(ret); | 4070 | BUG_ON(ret); |
3120 | } | 4071 | } |
3121 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3122 | return ret; | 4072 | return ret; |
3123 | } | 4073 | } |
3124 | 4074 | ||
@@ -3157,21 +4107,17 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3157 | * returns the tree buffer or NULL. | 4107 | * returns the tree buffer or NULL. |
3158 | */ | 4108 | */ |
3159 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 4109 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
3160 | struct btrfs_root *root, | 4110 | struct btrfs_root *root, u32 blocksize, |
3161 | u32 blocksize, u64 parent, | 4111 | u64 parent, u64 root_objectid, |
3162 | u64 root_objectid, | 4112 | struct btrfs_disk_key *key, int level, |
3163 | u64 ref_generation, | 4113 | u64 hint, u64 empty_size) |
3164 | int level, | ||
3165 | u64 hint, | ||
3166 | u64 empty_size) | ||
3167 | { | 4114 | { |
3168 | struct btrfs_key ins; | 4115 | struct btrfs_key ins; |
3169 | int ret; | 4116 | int ret; |
3170 | struct extent_buffer *buf; | 4117 | struct extent_buffer *buf; |
3171 | 4118 | ||
3172 | ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, | 4119 | ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, |
3173 | root_objectid, ref_generation, level, | 4120 | key, level, empty_size, hint, (u64)-1, &ins); |
3174 | empty_size, hint, (u64)-1, &ins, 0); | ||
3175 | if (ret) { | 4121 | if (ret) { |
3176 | BUG_ON(ret > 0); | 4122 | BUG_ON(ret > 0); |
3177 | return ERR_PTR(ret); | 4123 | return ERR_PTR(ret); |
@@ -3185,32 +4131,19 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
3185 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4131 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
3186 | struct btrfs_root *root, struct extent_buffer *leaf) | 4132 | struct btrfs_root *root, struct extent_buffer *leaf) |
3187 | { | 4133 | { |
3188 | u64 leaf_owner; | 4134 | u64 disk_bytenr; |
3189 | u64 leaf_generation; | 4135 | u64 num_bytes; |
3190 | struct refsort *sorted; | ||
3191 | struct btrfs_key key; | 4136 | struct btrfs_key key; |
3192 | struct btrfs_file_extent_item *fi; | 4137 | struct btrfs_file_extent_item *fi; |
4138 | u32 nritems; | ||
3193 | int i; | 4139 | int i; |
3194 | int nritems; | ||
3195 | int ret; | 4140 | int ret; |
3196 | int refi = 0; | ||
3197 | int slot; | ||
3198 | 4141 | ||
3199 | BUG_ON(!btrfs_is_leaf(leaf)); | 4142 | BUG_ON(!btrfs_is_leaf(leaf)); |
3200 | nritems = btrfs_header_nritems(leaf); | 4143 | nritems = btrfs_header_nritems(leaf); |
3201 | leaf_owner = btrfs_header_owner(leaf); | ||
3202 | leaf_generation = btrfs_header_generation(leaf); | ||
3203 | 4144 | ||
3204 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
3205 | /* we do this loop twice. The first time we build a list | ||
3206 | * of the extents we have a reference on, then we sort the list | ||
3207 | * by bytenr. The second time around we actually do the | ||
3208 | * extent freeing. | ||
3209 | */ | ||
3210 | for (i = 0; i < nritems; i++) { | 4145 | for (i = 0; i < nritems; i++) { |
3211 | u64 disk_bytenr; | ||
3212 | cond_resched(); | 4146 | cond_resched(); |
3213 | |||
3214 | btrfs_item_key_to_cpu(leaf, &key, i); | 4147 | btrfs_item_key_to_cpu(leaf, &key, i); |
3215 | 4148 | ||
3216 | /* only extents have references, skip everything else */ | 4149 | /* only extents have references, skip everything else */ |
@@ -3230,45 +4163,16 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3230 | if (disk_bytenr == 0) | 4163 | if (disk_bytenr == 0) |
3231 | continue; | 4164 | continue; |
3232 | 4165 | ||
3233 | sorted[refi].bytenr = disk_bytenr; | 4166 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); |
3234 | sorted[refi].slot = i; | 4167 | ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, |
3235 | refi++; | 4168 | leaf->start, 0, key.objectid, 0); |
3236 | } | ||
3237 | |||
3238 | if (refi == 0) | ||
3239 | goto out; | ||
3240 | |||
3241 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
3242 | |||
3243 | for (i = 0; i < refi; i++) { | ||
3244 | u64 disk_bytenr; | ||
3245 | |||
3246 | disk_bytenr = sorted[i].bytenr; | ||
3247 | slot = sorted[i].slot; | ||
3248 | |||
3249 | cond_resched(); | ||
3250 | |||
3251 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
3252 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
3253 | continue; | ||
3254 | |||
3255 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
3256 | |||
3257 | ret = btrfs_free_extent(trans, root, disk_bytenr, | ||
3258 | btrfs_file_extent_disk_num_bytes(leaf, fi), | ||
3259 | leaf->start, leaf_owner, leaf_generation, | ||
3260 | key.objectid, 0); | ||
3261 | BUG_ON(ret); | 4169 | BUG_ON(ret); |
3262 | |||
3263 | atomic_inc(&root->fs_info->throttle_gen); | ||
3264 | wake_up(&root->fs_info->transaction_throttle); | ||
3265 | cond_resched(); | ||
3266 | } | 4170 | } |
3267 | out: | ||
3268 | kfree(sorted); | ||
3269 | return 0; | 4171 | return 0; |
3270 | } | 4172 | } |
3271 | 4173 | ||
4174 | #if 0 | ||
4175 | |||
3272 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4176 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, |
3273 | struct btrfs_root *root, | 4177 | struct btrfs_root *root, |
3274 | struct btrfs_leaf_ref *ref) | 4178 | struct btrfs_leaf_ref *ref) |
@@ -3311,13 +4215,14 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3311 | return 0; | 4215 | return 0; |
3312 | } | 4216 | } |
3313 | 4217 | ||
4218 | |||
3314 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | 4219 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, |
3315 | struct btrfs_root *root, u64 start, | 4220 | struct btrfs_root *root, u64 start, |
3316 | u64 len, u32 *refs) | 4221 | u64 len, u32 *refs) |
3317 | { | 4222 | { |
3318 | int ret; | 4223 | int ret; |
3319 | 4224 | ||
3320 | ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); | 4225 | ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); |
3321 | BUG_ON(ret); | 4226 | BUG_ON(ret); |
3322 | 4227 | ||
3323 | #if 0 /* some debugging code in case we see problems here */ | 4228 | #if 0 /* some debugging code in case we see problems here */ |
@@ -3352,6 +4257,7 @@ static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | |||
3352 | return ret; | 4257 | return ret; |
3353 | } | 4258 | } |
3354 | 4259 | ||
4260 | |||
3355 | /* | 4261 | /* |
3356 | * this is used while deleting old snapshots, and it drops the refs | 4262 | * this is used while deleting old snapshots, and it drops the refs |
3357 | * on a whole subtree starting from a level 1 node. | 4263 | * on a whole subtree starting from a level 1 node. |
@@ -3645,32 +4551,36 @@ out: | |||
3645 | cond_resched(); | 4551 | cond_resched(); |
3646 | return 0; | 4552 | return 0; |
3647 | } | 4553 | } |
4554 | #endif | ||
3648 | 4555 | ||
3649 | /* | 4556 | /* |
3650 | * helper function for drop_subtree, this function is similar to | 4557 | * helper function for drop_subtree, this function is similar to |
3651 | * walk_down_tree. The main difference is that it checks reference | 4558 | * walk_down_tree. The main difference is that it checks reference |
3652 | * counts while tree blocks are locked. | 4559 | * counts while tree blocks are locked. |
3653 | */ | 4560 | */ |
3654 | static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, | 4561 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, |
3655 | struct btrfs_root *root, | 4562 | struct btrfs_root *root, |
3656 | struct btrfs_path *path, int *level) | 4563 | struct btrfs_path *path, int *level) |
3657 | { | 4564 | { |
3658 | struct extent_buffer *next; | 4565 | struct extent_buffer *next; |
3659 | struct extent_buffer *cur; | 4566 | struct extent_buffer *cur; |
3660 | struct extent_buffer *parent; | 4567 | struct extent_buffer *parent; |
3661 | u64 bytenr; | 4568 | u64 bytenr; |
3662 | u64 ptr_gen; | 4569 | u64 ptr_gen; |
4570 | u64 refs; | ||
4571 | u64 flags; | ||
3663 | u32 blocksize; | 4572 | u32 blocksize; |
3664 | u32 refs; | ||
3665 | int ret; | 4573 | int ret; |
3666 | 4574 | ||
3667 | cur = path->nodes[*level]; | 4575 | cur = path->nodes[*level]; |
3668 | ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, | 4576 | ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, |
3669 | &refs); | 4577 | &refs, &flags); |
3670 | BUG_ON(ret); | 4578 | BUG_ON(ret); |
3671 | if (refs > 1) | 4579 | if (refs > 1) |
3672 | goto out; | 4580 | goto out; |
3673 | 4581 | ||
4582 | BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); | ||
4583 | |||
3674 | while (*level >= 0) { | 4584 | while (*level >= 0) { |
3675 | cur = path->nodes[*level]; | 4585 | cur = path->nodes[*level]; |
3676 | if (*level == 0) { | 4586 | if (*level == 0) { |
@@ -3692,16 +4602,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, | |||
3692 | btrfs_tree_lock(next); | 4602 | btrfs_tree_lock(next); |
3693 | btrfs_set_lock_blocking(next); | 4603 | btrfs_set_lock_blocking(next); |
3694 | 4604 | ||
3695 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, | 4605 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, |
3696 | &refs); | 4606 | &refs, &flags); |
3697 | BUG_ON(ret); | 4607 | BUG_ON(ret); |
3698 | if (refs > 1) { | 4608 | if (refs > 1) { |
3699 | parent = path->nodes[*level]; | 4609 | parent = path->nodes[*level]; |
3700 | ret = btrfs_free_extent(trans, root, bytenr, | 4610 | ret = btrfs_free_extent(trans, root, bytenr, |
3701 | blocksize, parent->start, | 4611 | blocksize, parent->start, |
3702 | btrfs_header_owner(parent), | 4612 | btrfs_header_owner(parent), |
3703 | btrfs_header_generation(parent), | 4613 | *level - 1, 0); |
3704 | *level - 1, 1); | ||
3705 | BUG_ON(ret); | 4614 | BUG_ON(ret); |
3706 | path->slots[*level]++; | 4615 | path->slots[*level]++; |
3707 | btrfs_tree_unlock(next); | 4616 | btrfs_tree_unlock(next); |
@@ -3709,6 +4618,8 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, | |||
3709 | continue; | 4618 | continue; |
3710 | } | 4619 | } |
3711 | 4620 | ||
4621 | BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); | ||
4622 | |||
3712 | *level = btrfs_header_level(next); | 4623 | *level = btrfs_header_level(next); |
3713 | path->nodes[*level] = next; | 4624 | path->nodes[*level] = next; |
3714 | path->slots[*level] = 0; | 4625 | path->slots[*level] = 0; |
@@ -3716,13 +4627,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, | |||
3716 | cond_resched(); | 4627 | cond_resched(); |
3717 | } | 4628 | } |
3718 | out: | 4629 | out: |
3719 | parent = path->nodes[*level + 1]; | 4630 | if (path->nodes[*level] == root->node) |
4631 | parent = path->nodes[*level]; | ||
4632 | else | ||
4633 | parent = path->nodes[*level + 1]; | ||
3720 | bytenr = path->nodes[*level]->start; | 4634 | bytenr = path->nodes[*level]->start; |
3721 | blocksize = path->nodes[*level]->len; | 4635 | blocksize = path->nodes[*level]->len; |
3722 | 4636 | ||
3723 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | 4637 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, |
3724 | parent->start, btrfs_header_owner(parent), | 4638 | btrfs_header_owner(parent), *level, 0); |
3725 | btrfs_header_generation(parent), *level, 1); | ||
3726 | BUG_ON(ret); | 4639 | BUG_ON(ret); |
3727 | 4640 | ||
3728 | if (path->locks[*level]) { | 4641 | if (path->locks[*level]) { |
@@ -3746,8 +4659,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
3746 | struct btrfs_path *path, | 4659 | struct btrfs_path *path, |
3747 | int *level, int max_level) | 4660 | int *level, int max_level) |
3748 | { | 4661 | { |
3749 | u64 root_owner; | ||
3750 | u64 root_gen; | ||
3751 | struct btrfs_root_item *root_item = &root->root_item; | 4662 | struct btrfs_root_item *root_item = &root->root_item; |
3752 | int i; | 4663 | int i; |
3753 | int slot; | 4664 | int slot; |
@@ -3755,24 +4666,22 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
3755 | 4666 | ||
3756 | for (i = *level; i < max_level && path->nodes[i]; i++) { | 4667 | for (i = *level; i < max_level && path->nodes[i]; i++) { |
3757 | slot = path->slots[i]; | 4668 | slot = path->slots[i]; |
3758 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 4669 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
3759 | struct extent_buffer *node; | ||
3760 | struct btrfs_disk_key disk_key; | ||
3761 | |||
3762 | /* | 4670 | /* |
3763 | * there is more work to do in this level. | 4671 | * there is more work to do in this level. |
3764 | * Update the drop_progress marker to reflect | 4672 | * Update the drop_progress marker to reflect |
3765 | * the work we've done so far, and then bump | 4673 | * the work we've done so far, and then bump |
3766 | * the slot number | 4674 | * the slot number |
3767 | */ | 4675 | */ |
3768 | node = path->nodes[i]; | ||
3769 | path->slots[i]++; | 4676 | path->slots[i]++; |
3770 | *level = i; | ||
3771 | WARN_ON(*level == 0); | 4677 | WARN_ON(*level == 0); |
3772 | btrfs_node_key(node, &disk_key, path->slots[i]); | 4678 | if (max_level == BTRFS_MAX_LEVEL) { |
3773 | memcpy(&root_item->drop_progress, | 4679 | btrfs_node_key(path->nodes[i], |
3774 | &disk_key, sizeof(disk_key)); | 4680 | &root_item->drop_progress, |
3775 | root_item->drop_level = i; | 4681 | path->slots[i]); |
4682 | root_item->drop_level = i; | ||
4683 | } | ||
4684 | *level = i; | ||
3776 | return 0; | 4685 | return 0; |
3777 | } else { | 4686 | } else { |
3778 | struct extent_buffer *parent; | 4687 | struct extent_buffer *parent; |
@@ -3786,22 +4695,20 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
3786 | else | 4695 | else |
3787 | parent = path->nodes[*level + 1]; | 4696 | parent = path->nodes[*level + 1]; |
3788 | 4697 | ||
3789 | root_owner = btrfs_header_owner(parent); | 4698 | clean_tree_block(trans, root, path->nodes[i]); |
3790 | root_gen = btrfs_header_generation(parent); | ||
3791 | |||
3792 | clean_tree_block(trans, root, path->nodes[*level]); | ||
3793 | ret = btrfs_free_extent(trans, root, | 4699 | ret = btrfs_free_extent(trans, root, |
3794 | path->nodes[*level]->start, | 4700 | path->nodes[i]->start, |
3795 | path->nodes[*level]->len, | 4701 | path->nodes[i]->len, |
3796 | parent->start, root_owner, | 4702 | parent->start, |
3797 | root_gen, *level, 1); | 4703 | btrfs_header_owner(parent), |
4704 | *level, 0); | ||
3798 | BUG_ON(ret); | 4705 | BUG_ON(ret); |
3799 | if (path->locks[*level]) { | 4706 | if (path->locks[*level]) { |
3800 | btrfs_tree_unlock(path->nodes[*level]); | 4707 | btrfs_tree_unlock(path->nodes[i]); |
3801 | path->locks[*level] = 0; | 4708 | path->locks[i] = 0; |
3802 | } | 4709 | } |
3803 | free_extent_buffer(path->nodes[*level]); | 4710 | free_extent_buffer(path->nodes[i]); |
3804 | path->nodes[*level] = NULL; | 4711 | path->nodes[i] = NULL; |
3805 | *level = i + 1; | 4712 | *level = i + 1; |
3806 | } | 4713 | } |
3807 | } | 4714 | } |
@@ -3820,21 +4727,18 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3820 | int wret; | 4727 | int wret; |
3821 | int level; | 4728 | int level; |
3822 | struct btrfs_path *path; | 4729 | struct btrfs_path *path; |
3823 | int i; | ||
3824 | int orig_level; | ||
3825 | int update_count; | 4730 | int update_count; |
3826 | struct btrfs_root_item *root_item = &root->root_item; | 4731 | struct btrfs_root_item *root_item = &root->root_item; |
3827 | 4732 | ||
3828 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); | ||
3829 | path = btrfs_alloc_path(); | 4733 | path = btrfs_alloc_path(); |
3830 | BUG_ON(!path); | 4734 | BUG_ON(!path); |
3831 | 4735 | ||
3832 | level = btrfs_header_level(root->node); | 4736 | level = btrfs_header_level(root->node); |
3833 | orig_level = level; | ||
3834 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 4737 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
3835 | path->nodes[level] = root->node; | 4738 | path->nodes[level] = btrfs_lock_root_node(root); |
3836 | extent_buffer_get(root->node); | 4739 | btrfs_set_lock_blocking(path->nodes[level]); |
3837 | path->slots[level] = 0; | 4740 | path->slots[level] = 0; |
4741 | path->locks[level] = 1; | ||
3838 | } else { | 4742 | } else { |
3839 | struct btrfs_key key; | 4743 | struct btrfs_key key; |
3840 | struct btrfs_disk_key found_key; | 4744 | struct btrfs_disk_key found_key; |
@@ -3856,12 +4760,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3856 | * unlock our path, this is safe because only this | 4760 | * unlock our path, this is safe because only this |
3857 | * function is allowed to delete this snapshot | 4761 | * function is allowed to delete this snapshot |
3858 | */ | 4762 | */ |
3859 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | 4763 | btrfs_unlock_up_safe(path, 0); |
3860 | if (path->nodes[i] && path->locks[i]) { | ||
3861 | path->locks[i] = 0; | ||
3862 | btrfs_tree_unlock(path->nodes[i]); | ||
3863 | } | ||
3864 | } | ||
3865 | } | 4764 | } |
3866 | while (1) { | 4765 | while (1) { |
3867 | unsigned long update; | 4766 | unsigned long update; |
@@ -3882,8 +4781,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3882 | ret = -EAGAIN; | 4781 | ret = -EAGAIN; |
3883 | break; | 4782 | break; |
3884 | } | 4783 | } |
3885 | atomic_inc(&root->fs_info->throttle_gen); | ||
3886 | wake_up(&root->fs_info->transaction_throttle); | ||
3887 | for (update_count = 0; update_count < 16; update_count++) { | 4784 | for (update_count = 0; update_count < 16; update_count++) { |
3888 | update = trans->delayed_ref_updates; | 4785 | update = trans->delayed_ref_updates; |
3889 | trans->delayed_ref_updates = 0; | 4786 | trans->delayed_ref_updates = 0; |
@@ -3893,12 +4790,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3893 | break; | 4790 | break; |
3894 | } | 4791 | } |
3895 | } | 4792 | } |
3896 | for (i = 0; i <= orig_level; i++) { | ||
3897 | if (path->nodes[i]) { | ||
3898 | free_extent_buffer(path->nodes[i]); | ||
3899 | path->nodes[i] = NULL; | ||
3900 | } | ||
3901 | } | ||
3902 | out: | 4793 | out: |
3903 | btrfs_free_path(path); | 4794 | btrfs_free_path(path); |
3904 | return ret; | 4795 | return ret; |
@@ -3931,7 +4822,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
3931 | path->slots[level] = 0; | 4822 | path->slots[level] = 0; |
3932 | 4823 | ||
3933 | while (1) { | 4824 | while (1) { |
3934 | wret = walk_down_subtree(trans, root, path, &level); | 4825 | wret = walk_down_tree(trans, root, path, &level); |
3935 | if (wret < 0) | 4826 | if (wret < 0) |
3936 | ret = wret; | 4827 | ret = wret; |
3937 | if (wret != 0) | 4828 | if (wret != 0) |
@@ -3948,6 +4839,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
3948 | return ret; | 4839 | return ret; |
3949 | } | 4840 | } |
3950 | 4841 | ||
4842 | #if 0 | ||
3951 | static unsigned long calc_ra(unsigned long start, unsigned long last, | 4843 | static unsigned long calc_ra(unsigned long start, unsigned long last, |
3952 | unsigned long nr) | 4844 | unsigned long nr) |
3953 | { | 4845 | { |
@@ -5429,6 +6321,7 @@ out: | |||
5429 | kfree(ref_path); | 6321 | kfree(ref_path); |
5430 | return ret; | 6322 | return ret; |
5431 | } | 6323 | } |
6324 | #endif | ||
5432 | 6325 | ||
5433 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | 6326 | static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) |
5434 | { | 6327 | { |
@@ -5477,7 +6370,8 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root, | |||
5477 | u64 calc; | 6370 | u64 calc; |
5478 | 6371 | ||
5479 | spin_lock(&shrink_block_group->lock); | 6372 | spin_lock(&shrink_block_group->lock); |
5480 | if (btrfs_block_group_used(&shrink_block_group->item) > 0) { | 6373 | if (btrfs_block_group_used(&shrink_block_group->item) + |
6374 | shrink_block_group->reserved > 0) { | ||
5481 | spin_unlock(&shrink_block_group->lock); | 6375 | spin_unlock(&shrink_block_group->lock); |
5482 | 6376 | ||
5483 | trans = btrfs_start_transaction(root, 1); | 6377 | trans = btrfs_start_transaction(root, 1); |
@@ -5502,6 +6396,17 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root, | |||
5502 | return 0; | 6396 | return 0; |
5503 | } | 6397 | } |
5504 | 6398 | ||
6399 | |||
6400 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | ||
6401 | struct btrfs_block_group_cache *group) | ||
6402 | |||
6403 | { | ||
6404 | __alloc_chunk_for_shrink(root, group, 1); | ||
6405 | set_block_group_readonly(group); | ||
6406 | return 0; | ||
6407 | } | ||
6408 | |||
6409 | #if 0 | ||
5505 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 6410 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
5506 | struct btrfs_root *root, | 6411 | struct btrfs_root *root, |
5507 | u64 objectid, u64 size) | 6412 | u64 objectid, u64 size) |
@@ -5781,6 +6686,7 @@ out: | |||
5781 | btrfs_free_path(path); | 6686 | btrfs_free_path(path); |
5782 | return ret; | 6687 | return ret; |
5783 | } | 6688 | } |
6689 | #endif | ||
5784 | 6690 | ||
5785 | static int find_first_block_group(struct btrfs_root *root, | 6691 | static int find_first_block_group(struct btrfs_root *root, |
5786 | struct btrfs_path *path, struct btrfs_key *key) | 6692 | struct btrfs_path *path, struct btrfs_key *key) |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fe9eb990e443..68260180f587 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -476,6 +476,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
476 | struct extent_state *state; | 476 | struct extent_state *state; |
477 | struct extent_state *prealloc = NULL; | 477 | struct extent_state *prealloc = NULL; |
478 | struct rb_node *node; | 478 | struct rb_node *node; |
479 | u64 last_end; | ||
479 | int err; | 480 | int err; |
480 | int set = 0; | 481 | int set = 0; |
481 | 482 | ||
@@ -498,6 +499,7 @@ again: | |||
498 | if (state->start > end) | 499 | if (state->start > end) |
499 | goto out; | 500 | goto out; |
500 | WARN_ON(state->end < start); | 501 | WARN_ON(state->end < start); |
502 | last_end = state->end; | ||
501 | 503 | ||
502 | /* | 504 | /* |
503 | * | ---- desired range ---- | | 505 | * | ---- desired range ---- | |
@@ -524,9 +526,11 @@ again: | |||
524 | if (err) | 526 | if (err) |
525 | goto out; | 527 | goto out; |
526 | if (state->end <= end) { | 528 | if (state->end <= end) { |
527 | start = state->end + 1; | ||
528 | set |= clear_state_bit(tree, state, bits, | 529 | set |= clear_state_bit(tree, state, bits, |
529 | wake, delete); | 530 | wake, delete); |
531 | if (last_end == (u64)-1) | ||
532 | goto out; | ||
533 | start = last_end + 1; | ||
530 | } else { | 534 | } else { |
531 | start = state->start; | 535 | start = state->start; |
532 | } | 536 | } |
@@ -552,8 +556,10 @@ again: | |||
552 | goto out; | 556 | goto out; |
553 | } | 557 | } |
554 | 558 | ||
555 | start = state->end + 1; | ||
556 | set |= clear_state_bit(tree, state, bits, wake, delete); | 559 | set |= clear_state_bit(tree, state, bits, wake, delete); |
560 | if (last_end == (u64)-1) | ||
561 | goto out; | ||
562 | start = last_end + 1; | ||
557 | goto search_again; | 563 | goto search_again; |
558 | 564 | ||
559 | out: | 565 | out: |
@@ -707,8 +713,10 @@ again: | |||
707 | goto out; | 713 | goto out; |
708 | } | 714 | } |
709 | set_state_bits(tree, state, bits); | 715 | set_state_bits(tree, state, bits); |
710 | start = state->end + 1; | ||
711 | merge_state(tree, state); | 716 | merge_state(tree, state); |
717 | if (last_end == (u64)-1) | ||
718 | goto out; | ||
719 | start = last_end + 1; | ||
712 | goto search_again; | 720 | goto search_again; |
713 | } | 721 | } |
714 | 722 | ||
@@ -742,8 +750,10 @@ again: | |||
742 | goto out; | 750 | goto out; |
743 | if (state->end <= end) { | 751 | if (state->end <= end) { |
744 | set_state_bits(tree, state, bits); | 752 | set_state_bits(tree, state, bits); |
745 | start = state->end + 1; | ||
746 | merge_state(tree, state); | 753 | merge_state(tree, state); |
754 | if (last_end == (u64)-1) | ||
755 | goto out; | ||
756 | start = last_end + 1; | ||
747 | } else { | 757 | } else { |
748 | start = state->start; | 758 | start = state->start; |
749 | } | 759 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1d51dc38bb49..126477eaecf5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -291,16 +291,12 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
291 | { | 291 | { |
292 | u64 extent_end = 0; | 292 | u64 extent_end = 0; |
293 | u64 search_start = start; | 293 | u64 search_start = start; |
294 | u64 leaf_start; | ||
295 | u64 ram_bytes = 0; | 294 | u64 ram_bytes = 0; |
296 | u64 orig_parent = 0; | ||
297 | u64 disk_bytenr = 0; | 295 | u64 disk_bytenr = 0; |
298 | u64 orig_locked_end = locked_end; | 296 | u64 orig_locked_end = locked_end; |
299 | u8 compression; | 297 | u8 compression; |
300 | u8 encryption; | 298 | u8 encryption; |
301 | u16 other_encoding = 0; | 299 | u16 other_encoding = 0; |
302 | u64 root_gen; | ||
303 | u64 root_owner; | ||
304 | struct extent_buffer *leaf; | 300 | struct extent_buffer *leaf; |
305 | struct btrfs_file_extent_item *extent; | 301 | struct btrfs_file_extent_item *extent; |
306 | struct btrfs_path *path; | 302 | struct btrfs_path *path; |
@@ -340,9 +336,6 @@ next_slot: | |||
340 | bookend = 0; | 336 | bookend = 0; |
341 | found_extent = 0; | 337 | found_extent = 0; |
342 | found_inline = 0; | 338 | found_inline = 0; |
343 | leaf_start = 0; | ||
344 | root_gen = 0; | ||
345 | root_owner = 0; | ||
346 | compression = 0; | 339 | compression = 0; |
347 | encryption = 0; | 340 | encryption = 0; |
348 | extent = NULL; | 341 | extent = NULL; |
@@ -417,9 +410,6 @@ next_slot: | |||
417 | if (found_extent) { | 410 | if (found_extent) { |
418 | read_extent_buffer(leaf, &old, (unsigned long)extent, | 411 | read_extent_buffer(leaf, &old, (unsigned long)extent, |
419 | sizeof(old)); | 412 | sizeof(old)); |
420 | root_gen = btrfs_header_generation(leaf); | ||
421 | root_owner = btrfs_header_owner(leaf); | ||
422 | leaf_start = leaf->start; | ||
423 | } | 413 | } |
424 | 414 | ||
425 | if (end < extent_end && end >= key.offset) { | 415 | if (end < extent_end && end >= key.offset) { |
@@ -443,14 +433,14 @@ next_slot: | |||
443 | } | 433 | } |
444 | locked_end = extent_end; | 434 | locked_end = extent_end; |
445 | } | 435 | } |
446 | orig_parent = path->nodes[0]->start; | ||
447 | disk_bytenr = le64_to_cpu(old.disk_bytenr); | 436 | disk_bytenr = le64_to_cpu(old.disk_bytenr); |
448 | if (disk_bytenr != 0) { | 437 | if (disk_bytenr != 0) { |
449 | ret = btrfs_inc_extent_ref(trans, root, | 438 | ret = btrfs_inc_extent_ref(trans, root, |
450 | disk_bytenr, | 439 | disk_bytenr, |
451 | le64_to_cpu(old.disk_num_bytes), | 440 | le64_to_cpu(old.disk_num_bytes), 0, |
452 | orig_parent, root->root_key.objectid, | 441 | root->root_key.objectid, |
453 | trans->transid, inode->i_ino); | 442 | key.objectid, key.offset - |
443 | le64_to_cpu(old.offset)); | ||
454 | BUG_ON(ret); | 444 | BUG_ON(ret); |
455 | } | 445 | } |
456 | } | 446 | } |
@@ -568,17 +558,6 @@ next_slot: | |||
568 | btrfs_mark_buffer_dirty(path->nodes[0]); | 558 | btrfs_mark_buffer_dirty(path->nodes[0]); |
569 | btrfs_set_lock_blocking(path->nodes[0]); | 559 | btrfs_set_lock_blocking(path->nodes[0]); |
570 | 560 | ||
571 | if (disk_bytenr != 0) { | ||
572 | ret = btrfs_update_extent_ref(trans, root, | ||
573 | disk_bytenr, | ||
574 | le64_to_cpu(old.disk_num_bytes), | ||
575 | orig_parent, | ||
576 | leaf->start, | ||
577 | root->root_key.objectid, | ||
578 | trans->transid, ins.objectid); | ||
579 | |||
580 | BUG_ON(ret); | ||
581 | } | ||
582 | path->leave_spinning = 0; | 561 | path->leave_spinning = 0; |
583 | btrfs_release_path(root, path); | 562 | btrfs_release_path(root, path); |
584 | if (disk_bytenr != 0) | 563 | if (disk_bytenr != 0) |
@@ -594,8 +573,9 @@ next_slot: | |||
594 | ret = btrfs_free_extent(trans, root, | 573 | ret = btrfs_free_extent(trans, root, |
595 | old_disk_bytenr, | 574 | old_disk_bytenr, |
596 | le64_to_cpu(old.disk_num_bytes), | 575 | le64_to_cpu(old.disk_num_bytes), |
597 | leaf_start, root_owner, | 576 | 0, root->root_key.objectid, |
598 | root_gen, key.objectid, 0); | 577 | key.objectid, key.offset - |
578 | le64_to_cpu(old.offset)); | ||
599 | BUG_ON(ret); | 579 | BUG_ON(ret); |
600 | *hint_byte = old_disk_bytenr; | 580 | *hint_byte = old_disk_bytenr; |
601 | } | 581 | } |
@@ -664,12 +644,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
664 | u64 bytenr; | 644 | u64 bytenr; |
665 | u64 num_bytes; | 645 | u64 num_bytes; |
666 | u64 extent_end; | 646 | u64 extent_end; |
667 | u64 extent_offset; | 647 | u64 orig_offset; |
668 | u64 other_start; | 648 | u64 other_start; |
669 | u64 other_end; | 649 | u64 other_end; |
670 | u64 split = start; | 650 | u64 split = start; |
671 | u64 locked_end = end; | 651 | u64 locked_end = end; |
672 | u64 orig_parent; | ||
673 | int extent_type; | 652 | int extent_type; |
674 | int split_end = 1; | 653 | int split_end = 1; |
675 | int ret; | 654 | int ret; |
@@ -703,7 +682,7 @@ again: | |||
703 | 682 | ||
704 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 683 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
705 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | 684 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); |
706 | extent_offset = btrfs_file_extent_offset(leaf, fi); | 685 | orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); |
707 | 686 | ||
708 | if (key.offset == start) | 687 | if (key.offset == start) |
709 | split = end; | 688 | split = end; |
@@ -711,8 +690,6 @@ again: | |||
711 | if (key.offset == start && extent_end == end) { | 690 | if (key.offset == start && extent_end == end) { |
712 | int del_nr = 0; | 691 | int del_nr = 0; |
713 | int del_slot = 0; | 692 | int del_slot = 0; |
714 | u64 leaf_owner = btrfs_header_owner(leaf); | ||
715 | u64 leaf_gen = btrfs_header_generation(leaf); | ||
716 | other_start = end; | 693 | other_start = end; |
717 | other_end = 0; | 694 | other_end = 0; |
718 | if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, | 695 | if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, |
@@ -721,8 +698,8 @@ again: | |||
721 | del_slot = path->slots[0] + 1; | 698 | del_slot = path->slots[0] + 1; |
722 | del_nr++; | 699 | del_nr++; |
723 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 700 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
724 | leaf->start, leaf_owner, | 701 | 0, root->root_key.objectid, |
725 | leaf_gen, inode->i_ino, 0); | 702 | inode->i_ino, orig_offset); |
726 | BUG_ON(ret); | 703 | BUG_ON(ret); |
727 | } | 704 | } |
728 | other_start = 0; | 705 | other_start = 0; |
@@ -733,8 +710,8 @@ again: | |||
733 | del_slot = path->slots[0]; | 710 | del_slot = path->slots[0]; |
734 | del_nr++; | 711 | del_nr++; |
735 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 712 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
736 | leaf->start, leaf_owner, | 713 | 0, root->root_key.objectid, |
737 | leaf_gen, inode->i_ino, 0); | 714 | inode->i_ino, orig_offset); |
738 | BUG_ON(ret); | 715 | BUG_ON(ret); |
739 | } | 716 | } |
740 | split_end = 0; | 717 | split_end = 0; |
@@ -768,13 +745,12 @@ again: | |||
768 | locked_end = extent_end; | 745 | locked_end = extent_end; |
769 | } | 746 | } |
770 | btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); | 747 | btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); |
771 | extent_offset += split - key.offset; | ||
772 | } else { | 748 | } else { |
773 | BUG_ON(key.offset != start); | 749 | BUG_ON(key.offset != start); |
774 | btrfs_set_file_extent_offset(leaf, fi, extent_offset + | ||
775 | split - key.offset); | ||
776 | btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split); | ||
777 | key.offset = split; | 750 | key.offset = split; |
751 | btrfs_set_file_extent_offset(leaf, fi, key.offset - | ||
752 | orig_offset); | ||
753 | btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split); | ||
778 | btrfs_set_item_key_safe(trans, root, path, &key); | 754 | btrfs_set_item_key_safe(trans, root, path, &key); |
779 | extent_end = split; | 755 | extent_end = split; |
780 | } | 756 | } |
@@ -793,7 +769,8 @@ again: | |||
793 | struct btrfs_file_extent_item); | 769 | struct btrfs_file_extent_item); |
794 | key.offset = split; | 770 | key.offset = split; |
795 | btrfs_set_item_key_safe(trans, root, path, &key); | 771 | btrfs_set_item_key_safe(trans, root, path, &key); |
796 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); | 772 | btrfs_set_file_extent_offset(leaf, fi, key.offset - |
773 | orig_offset); | ||
797 | btrfs_set_file_extent_num_bytes(leaf, fi, | 774 | btrfs_set_file_extent_num_bytes(leaf, fi, |
798 | other_end - split); | 775 | other_end - split); |
799 | goto done; | 776 | goto done; |
@@ -815,10 +792,9 @@ again: | |||
815 | 792 | ||
816 | btrfs_mark_buffer_dirty(leaf); | 793 | btrfs_mark_buffer_dirty(leaf); |
817 | 794 | ||
818 | orig_parent = leaf->start; | 795 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, |
819 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, | 796 | root->root_key.objectid, |
820 | orig_parent, root->root_key.objectid, | 797 | inode->i_ino, orig_offset); |
821 | trans->transid, inode->i_ino); | ||
822 | BUG_ON(ret); | 798 | BUG_ON(ret); |
823 | btrfs_release_path(root, path); | 799 | btrfs_release_path(root, path); |
824 | 800 | ||
@@ -833,20 +809,12 @@ again: | |||
833 | btrfs_set_file_extent_type(leaf, fi, extent_type); | 809 | btrfs_set_file_extent_type(leaf, fi, extent_type); |
834 | btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); | 810 | btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); |
835 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); | 811 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); |
836 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); | 812 | btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset); |
837 | btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); | 813 | btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); |
838 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | 814 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); |
839 | btrfs_set_file_extent_compression(leaf, fi, 0); | 815 | btrfs_set_file_extent_compression(leaf, fi, 0); |
840 | btrfs_set_file_extent_encryption(leaf, fi, 0); | 816 | btrfs_set_file_extent_encryption(leaf, fi, 0); |
841 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); | 817 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); |
842 | |||
843 | if (orig_parent != leaf->start) { | ||
844 | ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes, | ||
845 | orig_parent, leaf->start, | ||
846 | root->root_key.objectid, | ||
847 | trans->transid, inode->i_ino); | ||
848 | BUG_ON(ret); | ||
849 | } | ||
850 | done: | 818 | done: |
851 | btrfs_mark_buffer_dirty(leaf); | 819 | btrfs_mark_buffer_dirty(leaf); |
852 | 820 | ||
@@ -1189,6 +1157,8 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1189 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1157 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
1190 | root->log_batch++; | 1158 | root->log_batch++; |
1191 | 1159 | ||
1160 | if (datasync && !(inode->i_state & I_DIRTY_PAGES)) | ||
1161 | goto out; | ||
1192 | /* | 1162 | /* |
1193 | * ok we haven't committed the transaction yet, lets do a commit | 1163 | * ok we haven't committed the transaction yet, lets do a commit |
1194 | */ | 1164 | */ |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0bc93657b460..4538e48581a5 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -579,6 +579,7 @@ out: | |||
579 | * it returns -enospc | 579 | * it returns -enospc |
580 | */ | 580 | */ |
581 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 581 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, |
582 | struct btrfs_root *root, | ||
582 | struct btrfs_block_group_cache *block_group, | 583 | struct btrfs_block_group_cache *block_group, |
583 | struct btrfs_free_cluster *cluster, | 584 | struct btrfs_free_cluster *cluster, |
584 | u64 offset, u64 bytes, u64 empty_size) | 585 | u64 offset, u64 bytes, u64 empty_size) |
@@ -595,7 +596,9 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
595 | int ret; | 596 | int ret; |
596 | 597 | ||
597 | /* for metadata, allow allocates with more holes */ | 598 | /* for metadata, allow allocates with more holes */ |
598 | if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | 599 | if (btrfs_test_opt(root, SSD_SPREAD)) { |
600 | min_bytes = bytes + empty_size; | ||
601 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
599 | /* | 602 | /* |
600 | * we want to do larger allocations when we are | 603 | * we want to do larger allocations when we are |
601 | * flushing out the delayed refs, it helps prevent | 604 | * flushing out the delayed refs, it helps prevent |
@@ -645,14 +648,15 @@ again: | |||
645 | * we haven't filled the empty size and the window is | 648 | * we haven't filled the empty size and the window is |
646 | * very large. reset and try again | 649 | * very large. reset and try again |
647 | */ | 650 | */ |
648 | if (next->offset - window_start > (bytes + empty_size) * 2) { | 651 | if (next->offset - (last->offset + last->bytes) > 128 * 1024 || |
652 | next->offset - window_start > (bytes + empty_size) * 2) { | ||
649 | entry = next; | 653 | entry = next; |
650 | window_start = entry->offset; | 654 | window_start = entry->offset; |
651 | window_free = entry->bytes; | 655 | window_free = entry->bytes; |
652 | last = entry; | 656 | last = entry; |
653 | max_extent = 0; | 657 | max_extent = 0; |
654 | total_retries++; | 658 | total_retries++; |
655 | if (total_retries % 256 == 0) { | 659 | if (total_retries % 64 == 0) { |
656 | if (min_bytes >= (bytes + empty_size)) { | 660 | if (min_bytes >= (bytes + empty_size)) { |
657 | ret = -ENOSPC; | 661 | ret = -ENOSPC; |
658 | goto out; | 662 | goto out; |
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index ab0bdc0a63ce..266fb8764054 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -31,6 +31,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
31 | u64 bytes); | 31 | u64 bytes); |
32 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | 32 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); |
33 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 33 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, |
34 | struct btrfs_root *root, | ||
34 | struct btrfs_block_group_cache *block_group, | 35 | struct btrfs_block_group_cache *block_group, |
35 | struct btrfs_free_cluster *cluster, | 36 | struct btrfs_free_cluster *cluster, |
36 | u64 offset, u64 bytes, u64 empty_size); | 37 | u64 offset, u64 bytes, u64 empty_size); |
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index 2a020b276768..db2ff9773b99 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h | |||
@@ -19,9 +19,9 @@ | |||
19 | #ifndef __HASH__ | 19 | #ifndef __HASH__ |
20 | #define __HASH__ | 20 | #define __HASH__ |
21 | 21 | ||
22 | #include "crc32c.h" | 22 | #include <linux/crc32c.h> |
23 | static inline u64 btrfs_name_hash(const char *name, int len) | 23 | static inline u64 btrfs_name_hash(const char *name, int len) |
24 | { | 24 | { |
25 | return btrfs_crc32c((u32)~1, name, len); | 25 | return crc32c((u32)~1, name, len); |
26 | } | 26 | } |
27 | #endif | 27 | #endif |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1c8b0190d031..8612b3a09811 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -48,7 +48,6 @@ | |||
48 | #include "ordered-data.h" | 48 | #include "ordered-data.h" |
49 | #include "xattr.h" | 49 | #include "xattr.h" |
50 | #include "tree-log.h" | 50 | #include "tree-log.h" |
51 | #include "ref-cache.h" | ||
52 | #include "compression.h" | 51 | #include "compression.h" |
53 | #include "locking.h" | 52 | #include "locking.h" |
54 | 53 | ||
@@ -369,7 +368,7 @@ again: | |||
369 | * inode has not been flagged as nocompress. This flag can | 368 | * inode has not been flagged as nocompress. This flag can |
370 | * change at any time if we discover bad compression ratios. | 369 | * change at any time if we discover bad compression ratios. |
371 | */ | 370 | */ |
372 | if (!btrfs_test_flag(inode, NOCOMPRESS) && | 371 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && |
373 | btrfs_test_opt(root, COMPRESS)) { | 372 | btrfs_test_opt(root, COMPRESS)) { |
374 | WARN_ON(pages); | 373 | WARN_ON(pages); |
375 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 374 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
@@ -470,7 +469,7 @@ again: | |||
470 | nr_pages_ret = 0; | 469 | nr_pages_ret = 0; |
471 | 470 | ||
472 | /* flag the file so we don't compress in the future */ | 471 | /* flag the file so we don't compress in the future */ |
473 | btrfs_set_flag(inode, NOCOMPRESS); | 472 | BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; |
474 | } | 473 | } |
475 | if (will_compress) { | 474 | if (will_compress) { |
476 | *num_added += 1; | 475 | *num_added += 1; |
@@ -863,7 +862,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
863 | async_cow->locked_page = locked_page; | 862 | async_cow->locked_page = locked_page; |
864 | async_cow->start = start; | 863 | async_cow->start = start; |
865 | 864 | ||
866 | if (btrfs_test_flag(inode, NOCOMPRESS)) | 865 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) |
867 | cur_end = end; | 866 | cur_end = end; |
868 | else | 867 | else |
869 | cur_end = min(end, start + 512 * 1024 - 1); | 868 | cur_end = min(end, start + 512 * 1024 - 1); |
@@ -944,6 +943,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
944 | u64 cow_start; | 943 | u64 cow_start; |
945 | u64 cur_offset; | 944 | u64 cur_offset; |
946 | u64 extent_end; | 945 | u64 extent_end; |
946 | u64 extent_offset; | ||
947 | u64 disk_bytenr; | 947 | u64 disk_bytenr; |
948 | u64 num_bytes; | 948 | u64 num_bytes; |
949 | int extent_type; | 949 | int extent_type; |
@@ -1005,6 +1005,7 @@ next_slot: | |||
1005 | if (extent_type == BTRFS_FILE_EXTENT_REG || | 1005 | if (extent_type == BTRFS_FILE_EXTENT_REG || |
1006 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) { | 1006 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
1007 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 1007 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
1008 | extent_offset = btrfs_file_extent_offset(leaf, fi); | ||
1008 | extent_end = found_key.offset + | 1009 | extent_end = found_key.offset + |
1009 | btrfs_file_extent_num_bytes(leaf, fi); | 1010 | btrfs_file_extent_num_bytes(leaf, fi); |
1010 | if (extent_end <= start) { | 1011 | if (extent_end <= start) { |
@@ -1022,9 +1023,10 @@ next_slot: | |||
1022 | if (btrfs_extent_readonly(root, disk_bytenr)) | 1023 | if (btrfs_extent_readonly(root, disk_bytenr)) |
1023 | goto out_check; | 1024 | goto out_check; |
1024 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | 1025 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, |
1025 | disk_bytenr)) | 1026 | found_key.offset - |
1027 | extent_offset, disk_bytenr)) | ||
1026 | goto out_check; | 1028 | goto out_check; |
1027 | disk_bytenr += btrfs_file_extent_offset(leaf, fi); | 1029 | disk_bytenr += extent_offset; |
1028 | disk_bytenr += cur_offset - found_key.offset; | 1030 | disk_bytenr += cur_offset - found_key.offset; |
1029 | num_bytes = min(end + 1, extent_end) - cur_offset; | 1031 | num_bytes = min(end + 1, extent_end) - cur_offset; |
1030 | /* | 1032 | /* |
@@ -1131,10 +1133,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1131 | int ret; | 1133 | int ret; |
1132 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1134 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1133 | 1135 | ||
1134 | if (btrfs_test_flag(inode, NODATACOW)) | 1136 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) |
1135 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1137 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1136 | page_started, 1, nr_written); | 1138 | page_started, 1, nr_written); |
1137 | else if (btrfs_test_flag(inode, PREALLOC)) | 1139 | else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) |
1138 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1140 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1139 | page_started, 0, nr_written); | 1141 | page_started, 0, nr_written); |
1140 | else if (!btrfs_test_opt(root, COMPRESS)) | 1142 | else if (!btrfs_test_opt(root, COMPRESS)) |
@@ -1288,7 +1290,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1288 | int ret = 0; | 1290 | int ret = 0; |
1289 | int skip_sum; | 1291 | int skip_sum; |
1290 | 1292 | ||
1291 | skip_sum = btrfs_test_flag(inode, NODATASUM); | 1293 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1292 | 1294 | ||
1293 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1295 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
1294 | BUG_ON(ret); | 1296 | BUG_ON(ret); |
@@ -1489,9 +1491,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1489 | ins.objectid = disk_bytenr; | 1491 | ins.objectid = disk_bytenr; |
1490 | ins.offset = disk_num_bytes; | 1492 | ins.offset = disk_num_bytes; |
1491 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 1493 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
1492 | ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, | 1494 | ret = btrfs_alloc_reserved_file_extent(trans, root, |
1493 | root->root_key.objectid, | 1495 | root->root_key.objectid, |
1494 | trans->transid, inode->i_ino, &ins); | 1496 | inode->i_ino, file_pos, &ins); |
1495 | BUG_ON(ret); | 1497 | BUG_ON(ret); |
1496 | btrfs_free_path(path); | 1498 | btrfs_free_path(path); |
1497 | 1499 | ||
@@ -1788,7 +1790,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
1788 | ClearPageChecked(page); | 1790 | ClearPageChecked(page); |
1789 | goto good; | 1791 | goto good; |
1790 | } | 1792 | } |
1791 | if (btrfs_test_flag(inode, NODATASUM)) | 1793 | |
1794 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) | ||
1792 | return 0; | 1795 | return 0; |
1793 | 1796 | ||
1794 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1797 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
@@ -1956,23 +1959,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
1956 | * crossing root thing. we store the inode number in the | 1959 | * crossing root thing. we store the inode number in the |
1957 | * offset of the orphan item. | 1960 | * offset of the orphan item. |
1958 | */ | 1961 | */ |
1959 | inode = btrfs_iget_locked(root->fs_info->sb, | 1962 | found_key.objectid = found_key.offset; |
1960 | found_key.offset, root); | 1963 | found_key.type = BTRFS_INODE_ITEM_KEY; |
1961 | if (!inode) | 1964 | found_key.offset = 0; |
1965 | inode = btrfs_iget(root->fs_info->sb, &found_key, root); | ||
1966 | if (IS_ERR(inode)) | ||
1962 | break; | 1967 | break; |
1963 | 1968 | ||
1964 | if (inode->i_state & I_NEW) { | ||
1965 | BTRFS_I(inode)->root = root; | ||
1966 | |||
1967 | /* have to set the location manually */ | ||
1968 | BTRFS_I(inode)->location.objectid = inode->i_ino; | ||
1969 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
1970 | BTRFS_I(inode)->location.offset = 0; | ||
1971 | |||
1972 | btrfs_read_locked_inode(inode); | ||
1973 | unlock_new_inode(inode); | ||
1974 | } | ||
1975 | |||
1976 | /* | 1969 | /* |
1977 | * add this inode to the orphan list so btrfs_orphan_del does | 1970 | * add this inode to the orphan list so btrfs_orphan_del does |
1978 | * the proper thing when we hit it | 1971 | * the proper thing when we hit it |
@@ -2069,7 +2062,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, | |||
2069 | /* | 2062 | /* |
2070 | * read an inode from the btree into the in-memory inode | 2063 | * read an inode from the btree into the in-memory inode |
2071 | */ | 2064 | */ |
2072 | void btrfs_read_locked_inode(struct inode *inode) | 2065 | static void btrfs_read_locked_inode(struct inode *inode) |
2073 | { | 2066 | { |
2074 | struct btrfs_path *path; | 2067 | struct btrfs_path *path; |
2075 | struct extent_buffer *leaf; | 2068 | struct extent_buffer *leaf; |
@@ -2164,6 +2157,8 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2164 | init_special_inode(inode, inode->i_mode, rdev); | 2157 | init_special_inode(inode, inode->i_mode, rdev); |
2165 | break; | 2158 | break; |
2166 | } | 2159 | } |
2160 | |||
2161 | btrfs_update_iflags(inode); | ||
2167 | return; | 2162 | return; |
2168 | 2163 | ||
2169 | make_bad: | 2164 | make_bad: |
@@ -2327,7 +2322,6 @@ err: | |||
2327 | btrfs_update_inode(trans, root, dir); | 2322 | btrfs_update_inode(trans, root, dir); |
2328 | btrfs_drop_nlink(inode); | 2323 | btrfs_drop_nlink(inode); |
2329 | ret = btrfs_update_inode(trans, root, inode); | 2324 | ret = btrfs_update_inode(trans, root, inode); |
2330 | dir->i_sb->s_dirt = 1; | ||
2331 | out: | 2325 | out: |
2332 | return ret; | 2326 | return ret; |
2333 | } | 2327 | } |
@@ -2599,9 +2593,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2599 | struct btrfs_file_extent_item *fi; | 2593 | struct btrfs_file_extent_item *fi; |
2600 | u64 extent_start = 0; | 2594 | u64 extent_start = 0; |
2601 | u64 extent_num_bytes = 0; | 2595 | u64 extent_num_bytes = 0; |
2596 | u64 extent_offset = 0; | ||
2602 | u64 item_end = 0; | 2597 | u64 item_end = 0; |
2603 | u64 root_gen = 0; | ||
2604 | u64 root_owner = 0; | ||
2605 | int found_extent; | 2598 | int found_extent; |
2606 | int del_item; | 2599 | int del_item; |
2607 | int pending_del_nr = 0; | 2600 | int pending_del_nr = 0; |
@@ -2716,6 +2709,9 @@ search_again: | |||
2716 | extent_num_bytes = | 2709 | extent_num_bytes = |
2717 | btrfs_file_extent_disk_num_bytes(leaf, | 2710 | btrfs_file_extent_disk_num_bytes(leaf, |
2718 | fi); | 2711 | fi); |
2712 | extent_offset = found_key.offset - | ||
2713 | btrfs_file_extent_offset(leaf, fi); | ||
2714 | |||
2719 | /* FIXME blocksize != 4096 */ | 2715 | /* FIXME blocksize != 4096 */ |
2720 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); | 2716 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); |
2721 | if (extent_start != 0) { | 2717 | if (extent_start != 0) { |
@@ -2723,8 +2719,6 @@ search_again: | |||
2723 | if (root->ref_cows) | 2719 | if (root->ref_cows) |
2724 | inode_sub_bytes(inode, num_dec); | 2720 | inode_sub_bytes(inode, num_dec); |
2725 | } | 2721 | } |
2726 | root_gen = btrfs_header_generation(leaf); | ||
2727 | root_owner = btrfs_header_owner(leaf); | ||
2728 | } | 2722 | } |
2729 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 2723 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
2730 | /* | 2724 | /* |
@@ -2768,12 +2762,12 @@ delete: | |||
2768 | } else { | 2762 | } else { |
2769 | break; | 2763 | break; |
2770 | } | 2764 | } |
2771 | if (found_extent) { | 2765 | if (found_extent && root->ref_cows) { |
2772 | btrfs_set_path_blocking(path); | 2766 | btrfs_set_path_blocking(path); |
2773 | ret = btrfs_free_extent(trans, root, extent_start, | 2767 | ret = btrfs_free_extent(trans, root, extent_start, |
2774 | extent_num_bytes, | 2768 | extent_num_bytes, 0, |
2775 | leaf->start, root_owner, | 2769 | btrfs_header_owner(leaf), |
2776 | root_gen, inode->i_ino, 0); | 2770 | inode->i_ino, extent_offset); |
2777 | BUG_ON(ret); | 2771 | BUG_ON(ret); |
2778 | } | 2772 | } |
2779 | next: | 2773 | next: |
@@ -2811,7 +2805,6 @@ error: | |||
2811 | pending_del_nr); | 2805 | pending_del_nr); |
2812 | } | 2806 | } |
2813 | btrfs_free_path(path); | 2807 | btrfs_free_path(path); |
2814 | inode->i_sb->s_dirt = 1; | ||
2815 | return ret; | 2808 | return ret; |
2816 | } | 2809 | } |
2817 | 2810 | ||
@@ -3105,6 +3098,45 @@ static int fixup_tree_root_location(struct btrfs_root *root, | |||
3105 | return 0; | 3098 | return 0; |
3106 | } | 3099 | } |
3107 | 3100 | ||
3101 | static void inode_tree_add(struct inode *inode) | ||
3102 | { | ||
3103 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3104 | struct btrfs_inode *entry; | ||
3105 | struct rb_node **p = &root->inode_tree.rb_node; | ||
3106 | struct rb_node *parent = NULL; | ||
3107 | |||
3108 | spin_lock(&root->inode_lock); | ||
3109 | while (*p) { | ||
3110 | parent = *p; | ||
3111 | entry = rb_entry(parent, struct btrfs_inode, rb_node); | ||
3112 | |||
3113 | if (inode->i_ino < entry->vfs_inode.i_ino) | ||
3114 | p = &(*p)->rb_left; | ||
3115 | else if (inode->i_ino > entry->vfs_inode.i_ino) | ||
3116 | p = &(*p)->rb_right; | ||
3117 | else { | ||
3118 | WARN_ON(!(entry->vfs_inode.i_state & | ||
3119 | (I_WILL_FREE | I_FREEING | I_CLEAR))); | ||
3120 | break; | ||
3121 | } | ||
3122 | } | ||
3123 | rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); | ||
3124 | rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree); | ||
3125 | spin_unlock(&root->inode_lock); | ||
3126 | } | ||
3127 | |||
3128 | static void inode_tree_del(struct inode *inode) | ||
3129 | { | ||
3130 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3131 | |||
3132 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | ||
3133 | spin_lock(&root->inode_lock); | ||
3134 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | ||
3135 | spin_unlock(&root->inode_lock); | ||
3136 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
3137 | } | ||
3138 | } | ||
3139 | |||
3108 | static noinline void init_btrfs_i(struct inode *inode) | 3140 | static noinline void init_btrfs_i(struct inode *inode) |
3109 | { | 3141 | { |
3110 | struct btrfs_inode *bi = BTRFS_I(inode); | 3142 | struct btrfs_inode *bi = BTRFS_I(inode); |
@@ -3130,6 +3162,7 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
3130 | inode->i_mapping, GFP_NOFS); | 3162 | inode->i_mapping, GFP_NOFS); |
3131 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | 3163 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); |
3132 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | 3164 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); |
3165 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
3133 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | 3166 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); |
3134 | mutex_init(&BTRFS_I(inode)->extent_mutex); | 3167 | mutex_init(&BTRFS_I(inode)->extent_mutex); |
3135 | mutex_init(&BTRFS_I(inode)->log_mutex); | 3168 | mutex_init(&BTRFS_I(inode)->log_mutex); |
@@ -3152,26 +3185,9 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) | |||
3152 | args->root == BTRFS_I(inode)->root; | 3185 | args->root == BTRFS_I(inode)->root; |
3153 | } | 3186 | } |
3154 | 3187 | ||
3155 | struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, | 3188 | static struct inode *btrfs_iget_locked(struct super_block *s, |
3156 | struct btrfs_root *root, int wait) | 3189 | u64 objectid, |
3157 | { | 3190 | struct btrfs_root *root) |
3158 | struct inode *inode; | ||
3159 | struct btrfs_iget_args args; | ||
3160 | args.ino = objectid; | ||
3161 | args.root = root; | ||
3162 | |||
3163 | if (wait) { | ||
3164 | inode = ilookup5(s, objectid, btrfs_find_actor, | ||
3165 | (void *)&args); | ||
3166 | } else { | ||
3167 | inode = ilookup5_nowait(s, objectid, btrfs_find_actor, | ||
3168 | (void *)&args); | ||
3169 | } | ||
3170 | return inode; | ||
3171 | } | ||
3172 | |||
3173 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | ||
3174 | struct btrfs_root *root) | ||
3175 | { | 3191 | { |
3176 | struct inode *inode; | 3192 | struct inode *inode; |
3177 | struct btrfs_iget_args args; | 3193 | struct btrfs_iget_args args; |
@@ -3188,24 +3204,21 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | |||
3188 | * Returns in *is_new if the inode was read from disk | 3204 | * Returns in *is_new if the inode was read from disk |
3189 | */ | 3205 | */ |
3190 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | 3206 | struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, |
3191 | struct btrfs_root *root, int *is_new) | 3207 | struct btrfs_root *root) |
3192 | { | 3208 | { |
3193 | struct inode *inode; | 3209 | struct inode *inode; |
3194 | 3210 | ||
3195 | inode = btrfs_iget_locked(s, location->objectid, root); | 3211 | inode = btrfs_iget_locked(s, location->objectid, root); |
3196 | if (!inode) | 3212 | if (!inode) |
3197 | return ERR_PTR(-EACCES); | 3213 | return ERR_PTR(-ENOMEM); |
3198 | 3214 | ||
3199 | if (inode->i_state & I_NEW) { | 3215 | if (inode->i_state & I_NEW) { |
3200 | BTRFS_I(inode)->root = root; | 3216 | BTRFS_I(inode)->root = root; |
3201 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 3217 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
3202 | btrfs_read_locked_inode(inode); | 3218 | btrfs_read_locked_inode(inode); |
3219 | |||
3220 | inode_tree_add(inode); | ||
3203 | unlock_new_inode(inode); | 3221 | unlock_new_inode(inode); |
3204 | if (is_new) | ||
3205 | *is_new = 1; | ||
3206 | } else { | ||
3207 | if (is_new) | ||
3208 | *is_new = 0; | ||
3209 | } | 3222 | } |
3210 | 3223 | ||
3211 | return inode; | 3224 | return inode; |
@@ -3218,7 +3231,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3218 | struct btrfs_root *root = bi->root; | 3231 | struct btrfs_root *root = bi->root; |
3219 | struct btrfs_root *sub_root = root; | 3232 | struct btrfs_root *sub_root = root; |
3220 | struct btrfs_key location; | 3233 | struct btrfs_key location; |
3221 | int ret, new; | 3234 | int ret; |
3222 | 3235 | ||
3223 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 3236 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
3224 | return ERR_PTR(-ENAMETOOLONG); | 3237 | return ERR_PTR(-ENAMETOOLONG); |
@@ -3236,7 +3249,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3236 | return ERR_PTR(ret); | 3249 | return ERR_PTR(ret); |
3237 | if (ret > 0) | 3250 | if (ret > 0) |
3238 | return ERR_PTR(-ENOENT); | 3251 | return ERR_PTR(-ENOENT); |
3239 | inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); | 3252 | inode = btrfs_iget(dir->i_sb, &location, sub_root); |
3240 | if (IS_ERR(inode)) | 3253 | if (IS_ERR(inode)) |
3241 | return ERR_CAST(inode); | 3254 | return ERR_CAST(inode); |
3242 | } | 3255 | } |
@@ -3574,9 +3587,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3574 | btrfs_find_block_group(root, 0, alloc_hint, owner); | 3587 | btrfs_find_block_group(root, 0, alloc_hint, owner); |
3575 | if ((mode & S_IFREG)) { | 3588 | if ((mode & S_IFREG)) { |
3576 | if (btrfs_test_opt(root, NODATASUM)) | 3589 | if (btrfs_test_opt(root, NODATASUM)) |
3577 | btrfs_set_flag(inode, NODATASUM); | 3590 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
3578 | if (btrfs_test_opt(root, NODATACOW)) | 3591 | if (btrfs_test_opt(root, NODATACOW)) |
3579 | btrfs_set_flag(inode, NODATACOW); | 3592 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
3580 | } | 3593 | } |
3581 | 3594 | ||
3582 | key[0].objectid = objectid; | 3595 | key[0].objectid = objectid; |
@@ -3630,7 +3643,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3630 | location->offset = 0; | 3643 | location->offset = 0; |
3631 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | 3644 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); |
3632 | 3645 | ||
3646 | btrfs_inherit_iflags(inode, dir); | ||
3647 | |||
3633 | insert_inode_hash(inode); | 3648 | insert_inode_hash(inode); |
3649 | inode_tree_add(inode); | ||
3634 | return inode; | 3650 | return inode; |
3635 | fail: | 3651 | fail: |
3636 | if (dir) | 3652 | if (dir) |
@@ -3750,7 +3766,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
3750 | init_special_inode(inode, inode->i_mode, rdev); | 3766 | init_special_inode(inode, inode->i_mode, rdev); |
3751 | btrfs_update_inode(trans, root, inode); | 3767 | btrfs_update_inode(trans, root, inode); |
3752 | } | 3768 | } |
3753 | dir->i_sb->s_dirt = 1; | ||
3754 | btrfs_update_inode_block_group(trans, inode); | 3769 | btrfs_update_inode_block_group(trans, inode); |
3755 | btrfs_update_inode_block_group(trans, dir); | 3770 | btrfs_update_inode_block_group(trans, dir); |
3756 | out_unlock: | 3771 | out_unlock: |
@@ -3815,7 +3830,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
3815 | inode->i_op = &btrfs_file_inode_operations; | 3830 | inode->i_op = &btrfs_file_inode_operations; |
3816 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 3831 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
3817 | } | 3832 | } |
3818 | dir->i_sb->s_dirt = 1; | ||
3819 | btrfs_update_inode_block_group(trans, inode); | 3833 | btrfs_update_inode_block_group(trans, inode); |
3820 | btrfs_update_inode_block_group(trans, dir); | 3834 | btrfs_update_inode_block_group(trans, dir); |
3821 | out_unlock: | 3835 | out_unlock: |
@@ -3862,7 +3876,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3862 | if (err) | 3876 | if (err) |
3863 | drop_inode = 1; | 3877 | drop_inode = 1; |
3864 | 3878 | ||
3865 | dir->i_sb->s_dirt = 1; | ||
3866 | btrfs_update_inode_block_group(trans, dir); | 3879 | btrfs_update_inode_block_group(trans, dir); |
3867 | err = btrfs_update_inode(trans, root, inode); | 3880 | err = btrfs_update_inode(trans, root, inode); |
3868 | 3881 | ||
@@ -3944,7 +3957,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
3944 | 3957 | ||
3945 | d_instantiate(dentry, inode); | 3958 | d_instantiate(dentry, inode); |
3946 | drop_on_err = 0; | 3959 | drop_on_err = 0; |
3947 | dir->i_sb->s_dirt = 1; | ||
3948 | btrfs_update_inode_block_group(trans, inode); | 3960 | btrfs_update_inode_block_group(trans, inode); |
3949 | btrfs_update_inode_block_group(trans, dir); | 3961 | btrfs_update_inode_block_group(trans, dir); |
3950 | 3962 | ||
@@ -4683,6 +4695,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
4683 | btrfs_put_ordered_extent(ordered); | 4695 | btrfs_put_ordered_extent(ordered); |
4684 | } | 4696 | } |
4685 | } | 4697 | } |
4698 | inode_tree_del(inode); | ||
4686 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); | 4699 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); |
4687 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 4700 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
4688 | } | 4701 | } |
@@ -4972,7 +4985,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
4972 | inode->i_op = &btrfs_file_inode_operations; | 4985 | inode->i_op = &btrfs_file_inode_operations; |
4973 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 4986 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
4974 | } | 4987 | } |
4975 | dir->i_sb->s_dirt = 1; | ||
4976 | btrfs_update_inode_block_group(trans, inode); | 4988 | btrfs_update_inode_block_group(trans, inode); |
4977 | btrfs_update_inode_block_group(trans, dir); | 4989 | btrfs_update_inode_block_group(trans, dir); |
4978 | if (drop_inode) | 4990 | if (drop_inode) |
@@ -5061,7 +5073,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
5061 | out: | 5073 | out: |
5062 | if (cur_offset > start) { | 5074 | if (cur_offset > start) { |
5063 | inode->i_ctime = CURRENT_TIME; | 5075 | inode->i_ctime = CURRENT_TIME; |
5064 | btrfs_set_flag(inode, PREALLOC); | 5076 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
5065 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 5077 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
5066 | cur_offset > i_size_read(inode)) | 5078 | cur_offset > i_size_read(inode)) |
5067 | btrfs_i_size_write(inode, cur_offset); | 5079 | btrfs_i_size_write(inode, cur_offset); |
@@ -5182,7 +5194,7 @@ static int btrfs_set_page_dirty(struct page *page) | |||
5182 | 5194 | ||
5183 | static int btrfs_permission(struct inode *inode, int mask) | 5195 | static int btrfs_permission(struct inode *inode, int mask) |
5184 | { | 5196 | { |
5185 | if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) | 5197 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
5186 | return -EACCES; | 5198 | return -EACCES; |
5187 | return generic_permission(inode, mask, btrfs_check_acl); | 5199 | return generic_permission(inode, mask, btrfs_check_acl); |
5188 | } | 5200 | } |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2624b53ea783..eff18f5b5362 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -50,7 +50,177 @@ | |||
50 | #include "volumes.h" | 50 | #include "volumes.h" |
51 | #include "locking.h" | 51 | #include "locking.h" |
52 | 52 | ||
53 | /* Mask out flags that are inappropriate for the given type of inode. */ | ||
54 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | ||
55 | { | ||
56 | if (S_ISDIR(mode)) | ||
57 | return flags; | ||
58 | else if (S_ISREG(mode)) | ||
59 | return flags & ~FS_DIRSYNC_FL; | ||
60 | else | ||
61 | return flags & (FS_NODUMP_FL | FS_NOATIME_FL); | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. | ||
66 | */ | ||
67 | static unsigned int btrfs_flags_to_ioctl(unsigned int flags) | ||
68 | { | ||
69 | unsigned int iflags = 0; | ||
70 | |||
71 | if (flags & BTRFS_INODE_SYNC) | ||
72 | iflags |= FS_SYNC_FL; | ||
73 | if (flags & BTRFS_INODE_IMMUTABLE) | ||
74 | iflags |= FS_IMMUTABLE_FL; | ||
75 | if (flags & BTRFS_INODE_APPEND) | ||
76 | iflags |= FS_APPEND_FL; | ||
77 | if (flags & BTRFS_INODE_NODUMP) | ||
78 | iflags |= FS_NODUMP_FL; | ||
79 | if (flags & BTRFS_INODE_NOATIME) | ||
80 | iflags |= FS_NOATIME_FL; | ||
81 | if (flags & BTRFS_INODE_DIRSYNC) | ||
82 | iflags |= FS_DIRSYNC_FL; | ||
83 | |||
84 | return iflags; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Update inode->i_flags based on the btrfs internal flags. | ||
89 | */ | ||
90 | void btrfs_update_iflags(struct inode *inode) | ||
91 | { | ||
92 | struct btrfs_inode *ip = BTRFS_I(inode); | ||
93 | |||
94 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | ||
95 | |||
96 | if (ip->flags & BTRFS_INODE_SYNC) | ||
97 | inode->i_flags |= S_SYNC; | ||
98 | if (ip->flags & BTRFS_INODE_IMMUTABLE) | ||
99 | inode->i_flags |= S_IMMUTABLE; | ||
100 | if (ip->flags & BTRFS_INODE_APPEND) | ||
101 | inode->i_flags |= S_APPEND; | ||
102 | if (ip->flags & BTRFS_INODE_NOATIME) | ||
103 | inode->i_flags |= S_NOATIME; | ||
104 | if (ip->flags & BTRFS_INODE_DIRSYNC) | ||
105 | inode->i_flags |= S_DIRSYNC; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Inherit flags from the parent inode. | ||
110 | * | ||
111 | * Unlike extN we don't have any flags we don't want to inherit currently. | ||
112 | */ | ||
113 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) | ||
114 | { | ||
115 | unsigned int flags; | ||
116 | |||
117 | if (!dir) | ||
118 | return; | ||
119 | |||
120 | flags = BTRFS_I(dir)->flags; | ||
121 | |||
122 | if (S_ISREG(inode->i_mode)) | ||
123 | flags &= ~BTRFS_INODE_DIRSYNC; | ||
124 | else if (!S_ISDIR(inode->i_mode)) | ||
125 | flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); | ||
126 | |||
127 | BTRFS_I(inode)->flags = flags; | ||
128 | btrfs_update_iflags(inode); | ||
129 | } | ||
130 | |||
131 | static int btrfs_ioctl_getflags(struct file *file, void __user *arg) | ||
132 | { | ||
133 | struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode); | ||
134 | unsigned int flags = btrfs_flags_to_ioctl(ip->flags); | ||
135 | |||
136 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
137 | return -EFAULT; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | ||
142 | { | ||
143 | struct inode *inode = file->f_path.dentry->d_inode; | ||
144 | struct btrfs_inode *ip = BTRFS_I(inode); | ||
145 | struct btrfs_root *root = ip->root; | ||
146 | struct btrfs_trans_handle *trans; | ||
147 | unsigned int flags, oldflags; | ||
148 | int ret; | ||
149 | |||
150 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
151 | return -EFAULT; | ||
152 | |||
153 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | ||
154 | FS_NOATIME_FL | FS_NODUMP_FL | \ | ||
155 | FS_SYNC_FL | FS_DIRSYNC_FL)) | ||
156 | return -EOPNOTSUPP; | ||
53 | 157 | ||
158 | if (!is_owner_or_cap(inode)) | ||
159 | return -EACCES; | ||
160 | |||
161 | mutex_lock(&inode->i_mutex); | ||
162 | |||
163 | flags = btrfs_mask_flags(inode->i_mode, flags); | ||
164 | oldflags = btrfs_flags_to_ioctl(ip->flags); | ||
165 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { | ||
166 | if (!capable(CAP_LINUX_IMMUTABLE)) { | ||
167 | ret = -EPERM; | ||
168 | goto out_unlock; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | ret = mnt_want_write(file->f_path.mnt); | ||
173 | if (ret) | ||
174 | goto out_unlock; | ||
175 | |||
176 | if (flags & FS_SYNC_FL) | ||
177 | ip->flags |= BTRFS_INODE_SYNC; | ||
178 | else | ||
179 | ip->flags &= ~BTRFS_INODE_SYNC; | ||
180 | if (flags & FS_IMMUTABLE_FL) | ||
181 | ip->flags |= BTRFS_INODE_IMMUTABLE; | ||
182 | else | ||
183 | ip->flags &= ~BTRFS_INODE_IMMUTABLE; | ||
184 | if (flags & FS_APPEND_FL) | ||
185 | ip->flags |= BTRFS_INODE_APPEND; | ||
186 | else | ||
187 | ip->flags &= ~BTRFS_INODE_APPEND; | ||
188 | if (flags & FS_NODUMP_FL) | ||
189 | ip->flags |= BTRFS_INODE_NODUMP; | ||
190 | else | ||
191 | ip->flags &= ~BTRFS_INODE_NODUMP; | ||
192 | if (flags & FS_NOATIME_FL) | ||
193 | ip->flags |= BTRFS_INODE_NOATIME; | ||
194 | else | ||
195 | ip->flags &= ~BTRFS_INODE_NOATIME; | ||
196 | if (flags & FS_DIRSYNC_FL) | ||
197 | ip->flags |= BTRFS_INODE_DIRSYNC; | ||
198 | else | ||
199 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | ||
200 | |||
201 | |||
202 | trans = btrfs_join_transaction(root, 1); | ||
203 | BUG_ON(!trans); | ||
204 | |||
205 | ret = btrfs_update_inode(trans, root, inode); | ||
206 | BUG_ON(ret); | ||
207 | |||
208 | btrfs_update_iflags(inode); | ||
209 | inode->i_ctime = CURRENT_TIME; | ||
210 | btrfs_end_transaction(trans, root); | ||
211 | |||
212 | mnt_drop_write(file->f_path.mnt); | ||
213 | out_unlock: | ||
214 | mutex_unlock(&inode->i_mutex); | ||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | ||
219 | { | ||
220 | struct inode *inode = file->f_path.dentry->d_inode; | ||
221 | |||
222 | return put_user(inode->i_generation, arg); | ||
223 | } | ||
54 | 224 | ||
55 | static noinline int create_subvol(struct btrfs_root *root, | 225 | static noinline int create_subvol(struct btrfs_root *root, |
56 | struct dentry *dentry, | 226 | struct dentry *dentry, |
@@ -82,22 +252,25 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
82 | if (ret) | 252 | if (ret) |
83 | goto fail; | 253 | goto fail; |
84 | 254 | ||
85 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 255 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
86 | objectid, trans->transid, 0, 0, 0); | 256 | 0, objectid, NULL, 0, 0, 0); |
87 | if (IS_ERR(leaf)) { | 257 | if (IS_ERR(leaf)) { |
88 | ret = PTR_ERR(leaf); | 258 | ret = PTR_ERR(leaf); |
89 | goto fail; | 259 | goto fail; |
90 | } | 260 | } |
91 | 261 | ||
92 | btrfs_set_header_nritems(leaf, 0); | 262 | memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); |
93 | btrfs_set_header_level(leaf, 0); | ||
94 | btrfs_set_header_bytenr(leaf, leaf->start); | 263 | btrfs_set_header_bytenr(leaf, leaf->start); |
95 | btrfs_set_header_generation(leaf, trans->transid); | 264 | btrfs_set_header_generation(leaf, trans->transid); |
265 | btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); | ||
96 | btrfs_set_header_owner(leaf, objectid); | 266 | btrfs_set_header_owner(leaf, objectid); |
97 | 267 | ||
98 | write_extent_buffer(leaf, root->fs_info->fsid, | 268 | write_extent_buffer(leaf, root->fs_info->fsid, |
99 | (unsigned long)btrfs_header_fsid(leaf), | 269 | (unsigned long)btrfs_header_fsid(leaf), |
100 | BTRFS_FSID_SIZE); | 270 | BTRFS_FSID_SIZE); |
271 | write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, | ||
272 | (unsigned long)btrfs_header_chunk_tree_uuid(leaf), | ||
273 | BTRFS_UUID_SIZE); | ||
101 | btrfs_mark_buffer_dirty(leaf); | 274 | btrfs_mark_buffer_dirty(leaf); |
102 | 275 | ||
103 | inode_item = &root_item.inode; | 276 | inode_item = &root_item.inode; |
@@ -125,7 +298,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
125 | btrfs_set_root_dirid(&root_item, new_dirid); | 298 | btrfs_set_root_dirid(&root_item, new_dirid); |
126 | 299 | ||
127 | key.objectid = objectid; | 300 | key.objectid = objectid; |
128 | key.offset = 1; | 301 | key.offset = 0; |
129 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 302 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
130 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 303 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, |
131 | &root_item); | 304 | &root_item); |
@@ -911,10 +1084,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
911 | if (disko) { | 1084 | if (disko) { |
912 | inode_add_bytes(inode, datal); | 1085 | inode_add_bytes(inode, datal); |
913 | ret = btrfs_inc_extent_ref(trans, root, | 1086 | ret = btrfs_inc_extent_ref(trans, root, |
914 | disko, diskl, leaf->start, | 1087 | disko, diskl, 0, |
915 | root->root_key.objectid, | 1088 | root->root_key.objectid, |
916 | trans->transid, | 1089 | inode->i_ino, |
917 | inode->i_ino); | 1090 | new_key.offset - datao); |
918 | BUG_ON(ret); | 1091 | BUG_ON(ret); |
919 | } | 1092 | } |
920 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | 1093 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { |
@@ -1074,6 +1247,12 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
1074 | void __user *argp = (void __user *)arg; | 1247 | void __user *argp = (void __user *)arg; |
1075 | 1248 | ||
1076 | switch (cmd) { | 1249 | switch (cmd) { |
1250 | case FS_IOC_GETFLAGS: | ||
1251 | return btrfs_ioctl_getflags(file, argp); | ||
1252 | case FS_IOC_SETFLAGS: | ||
1253 | return btrfs_ioctl_setflags(file, argp); | ||
1254 | case FS_IOC_GETVERSION: | ||
1255 | return btrfs_ioctl_getversion(file, argp); | ||
1077 | case BTRFS_IOC_SNAP_CREATE: | 1256 | case BTRFS_IOC_SNAP_CREATE: |
1078 | return btrfs_ioctl_snap_create(file, argp, 0); | 1257 | return btrfs_ioctl_snap_create(file, argp, 0); |
1079 | case BTRFS_IOC_SUBVOL_CREATE: | 1258 | case BTRFS_IOC_SUBVOL_CREATE: |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 5f8f218c1005..6d6523da0a30 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -45,22 +45,132 @@ static void print_dev_item(struct extent_buffer *eb, | |||
45 | (unsigned long long)btrfs_device_total_bytes(eb, dev_item), | 45 | (unsigned long long)btrfs_device_total_bytes(eb, dev_item), |
46 | (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); | 46 | (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); |
47 | } | 47 | } |
48 | static void print_extent_data_ref(struct extent_buffer *eb, | ||
49 | struct btrfs_extent_data_ref *ref) | ||
50 | { | ||
51 | printk(KERN_INFO "\t\textent data backref root %llu " | ||
52 | "objectid %llu offset %llu count %u\n", | ||
53 | (unsigned long long)btrfs_extent_data_ref_root(eb, ref), | ||
54 | (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref), | ||
55 | (unsigned long long)btrfs_extent_data_ref_offset(eb, ref), | ||
56 | btrfs_extent_data_ref_count(eb, ref)); | ||
57 | } | ||
58 | |||
59 | static void print_extent_item(struct extent_buffer *eb, int slot) | ||
60 | { | ||
61 | struct btrfs_extent_item *ei; | ||
62 | struct btrfs_extent_inline_ref *iref; | ||
63 | struct btrfs_extent_data_ref *dref; | ||
64 | struct btrfs_shared_data_ref *sref; | ||
65 | struct btrfs_disk_key key; | ||
66 | unsigned long end; | ||
67 | unsigned long ptr; | ||
68 | int type; | ||
69 | u32 item_size = btrfs_item_size_nr(eb, slot); | ||
70 | u64 flags; | ||
71 | u64 offset; | ||
72 | |||
73 | if (item_size < sizeof(*ei)) { | ||
74 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
75 | struct btrfs_extent_item_v0 *ei0; | ||
76 | BUG_ON(item_size != sizeof(*ei0)); | ||
77 | ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); | ||
78 | printk(KERN_INFO "\t\textent refs %u\n", | ||
79 | btrfs_extent_refs_v0(eb, ei0)); | ||
80 | return; | ||
81 | #else | ||
82 | BUG(); | ||
83 | #endif | ||
84 | } | ||
85 | |||
86 | ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); | ||
87 | flags = btrfs_extent_flags(eb, ei); | ||
88 | |||
89 | printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n", | ||
90 | (unsigned long long)btrfs_extent_refs(eb, ei), | ||
91 | (unsigned long long)btrfs_extent_generation(eb, ei), | ||
92 | (unsigned long long)flags); | ||
93 | |||
94 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
95 | struct btrfs_tree_block_info *info; | ||
96 | info = (struct btrfs_tree_block_info *)(ei + 1); | ||
97 | btrfs_tree_block_key(eb, info, &key); | ||
98 | printk(KERN_INFO "\t\ttree block key (%llu %x %llu) " | ||
99 | "level %d\n", | ||
100 | (unsigned long long)btrfs_disk_key_objectid(&key), | ||
101 | key.type, | ||
102 | (unsigned long long)btrfs_disk_key_offset(&key), | ||
103 | btrfs_tree_block_level(eb, info)); | ||
104 | iref = (struct btrfs_extent_inline_ref *)(info + 1); | ||
105 | } else { | ||
106 | iref = (struct btrfs_extent_inline_ref *)(ei + 1); | ||
107 | } | ||
108 | |||
109 | ptr = (unsigned long)iref; | ||
110 | end = (unsigned long)ei + item_size; | ||
111 | while (ptr < end) { | ||
112 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
113 | type = btrfs_extent_inline_ref_type(eb, iref); | ||
114 | offset = btrfs_extent_inline_ref_offset(eb, iref); | ||
115 | switch (type) { | ||
116 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
117 | printk(KERN_INFO "\t\ttree block backref " | ||
118 | "root %llu\n", (unsigned long long)offset); | ||
119 | break; | ||
120 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
121 | printk(KERN_INFO "\t\tshared block backref " | ||
122 | "parent %llu\n", (unsigned long long)offset); | ||
123 | break; | ||
124 | case BTRFS_EXTENT_DATA_REF_KEY: | ||
125 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
126 | print_extent_data_ref(eb, dref); | ||
127 | break; | ||
128 | case BTRFS_SHARED_DATA_REF_KEY: | ||
129 | sref = (struct btrfs_shared_data_ref *)(iref + 1); | ||
130 | printk(KERN_INFO "\t\tshared data backref " | ||
131 | "parent %llu count %u\n", | ||
132 | (unsigned long long)offset, | ||
133 | btrfs_shared_data_ref_count(eb, sref)); | ||
134 | break; | ||
135 | default: | ||
136 | BUG(); | ||
137 | } | ||
138 | ptr += btrfs_extent_inline_ref_size(type); | ||
139 | } | ||
140 | WARN_ON(ptr > end); | ||
141 | } | ||
142 | |||
143 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
144 | static void print_extent_ref_v0(struct extent_buffer *eb, int slot) | ||
145 | { | ||
146 | struct btrfs_extent_ref_v0 *ref0; | ||
147 | |||
148 | ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); | ||
149 | printk("\t\textent back ref root %llu gen %llu " | ||
150 | "owner %llu num_refs %lu\n", | ||
151 | (unsigned long long)btrfs_ref_root_v0(eb, ref0), | ||
152 | (unsigned long long)btrfs_ref_generation_v0(eb, ref0), | ||
153 | (unsigned long long)btrfs_ref_objectid_v0(eb, ref0), | ||
154 | (unsigned long)btrfs_ref_count_v0(eb, ref0)); | ||
155 | } | ||
156 | #endif | ||
157 | |||
48 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | 158 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) |
49 | { | 159 | { |
50 | int i; | 160 | int i; |
161 | u32 type; | ||
51 | u32 nr = btrfs_header_nritems(l); | 162 | u32 nr = btrfs_header_nritems(l); |
52 | struct btrfs_item *item; | 163 | struct btrfs_item *item; |
53 | struct btrfs_extent_item *ei; | ||
54 | struct btrfs_root_item *ri; | 164 | struct btrfs_root_item *ri; |
55 | struct btrfs_dir_item *di; | 165 | struct btrfs_dir_item *di; |
56 | struct btrfs_inode_item *ii; | 166 | struct btrfs_inode_item *ii; |
57 | struct btrfs_block_group_item *bi; | 167 | struct btrfs_block_group_item *bi; |
58 | struct btrfs_file_extent_item *fi; | 168 | struct btrfs_file_extent_item *fi; |
169 | struct btrfs_extent_data_ref *dref; | ||
170 | struct btrfs_shared_data_ref *sref; | ||
171 | struct btrfs_dev_extent *dev_extent; | ||
59 | struct btrfs_key key; | 172 | struct btrfs_key key; |
60 | struct btrfs_key found_key; | 173 | struct btrfs_key found_key; |
61 | struct btrfs_extent_ref *ref; | ||
62 | struct btrfs_dev_extent *dev_extent; | ||
63 | u32 type; | ||
64 | 174 | ||
65 | printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", | 175 | printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", |
66 | (unsigned long long)btrfs_header_bytenr(l), nr, | 176 | (unsigned long long)btrfs_header_bytenr(l), nr, |
@@ -100,20 +210,25 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
100 | btrfs_disk_root_refs(l, ri)); | 210 | btrfs_disk_root_refs(l, ri)); |
101 | break; | 211 | break; |
102 | case BTRFS_EXTENT_ITEM_KEY: | 212 | case BTRFS_EXTENT_ITEM_KEY: |
103 | ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); | 213 | print_extent_item(l, i); |
104 | printk(KERN_INFO "\t\textent data refs %u\n", | 214 | break; |
105 | btrfs_extent_refs(l, ei)); | 215 | case BTRFS_TREE_BLOCK_REF_KEY: |
106 | break; | 216 | printk(KERN_INFO "\t\ttree block backref\n"); |
107 | case BTRFS_EXTENT_REF_KEY: | 217 | break; |
108 | ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); | 218 | case BTRFS_SHARED_BLOCK_REF_KEY: |
109 | printk(KERN_INFO "\t\textent back ref root %llu " | 219 | printk(KERN_INFO "\t\tshared block backref\n"); |
110 | "gen %llu owner %llu num_refs %lu\n", | 220 | break; |
111 | (unsigned long long)btrfs_ref_root(l, ref), | 221 | case BTRFS_EXTENT_DATA_REF_KEY: |
112 | (unsigned long long)btrfs_ref_generation(l, ref), | 222 | dref = btrfs_item_ptr(l, i, |
113 | (unsigned long long)btrfs_ref_objectid(l, ref), | 223 | struct btrfs_extent_data_ref); |
114 | (unsigned long)btrfs_ref_num_refs(l, ref)); | 224 | print_extent_data_ref(l, dref); |
225 | break; | ||
226 | case BTRFS_SHARED_DATA_REF_KEY: | ||
227 | sref = btrfs_item_ptr(l, i, | ||
228 | struct btrfs_shared_data_ref); | ||
229 | printk(KERN_INFO "\t\tshared data backref count %u\n", | ||
230 | btrfs_shared_data_ref_count(l, sref)); | ||
115 | break; | 231 | break; |
116 | |||
117 | case BTRFS_EXTENT_DATA_KEY: | 232 | case BTRFS_EXTENT_DATA_KEY: |
118 | fi = btrfs_item_ptr(l, i, | 233 | fi = btrfs_item_ptr(l, i, |
119 | struct btrfs_file_extent_item); | 234 | struct btrfs_file_extent_item); |
@@ -139,6 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
139 | (unsigned long long) | 254 | (unsigned long long) |
140 | btrfs_file_extent_ram_bytes(l, fi)); | 255 | btrfs_file_extent_ram_bytes(l, fi)); |
141 | break; | 256 | break; |
257 | case BTRFS_EXTENT_REF_V0_KEY: | ||
258 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
259 | print_extent_ref_v0(l, i); | ||
260 | #else | ||
261 | BUG(); | ||
262 | #endif | ||
142 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | 263 | case BTRFS_BLOCK_GROUP_ITEM_KEY: |
143 | bi = btrfs_item_ptr(l, i, | 264 | bi = btrfs_item_ptr(l, i, |
144 | struct btrfs_block_group_item); | 265 | struct btrfs_block_group_item); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c new file mode 100644 index 000000000000..b23dc209ae10 --- /dev/null +++ b/fs/btrfs/relocation.c | |||
@@ -0,0 +1,3711 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/writeback.h> | ||
22 | #include <linux/blkdev.h> | ||
23 | #include <linux/rbtree.h> | ||
24 | #include "ctree.h" | ||
25 | #include "disk-io.h" | ||
26 | #include "transaction.h" | ||
27 | #include "volumes.h" | ||
28 | #include "locking.h" | ||
29 | #include "btrfs_inode.h" | ||
30 | #include "async-thread.h" | ||
31 | |||
32 | /* | ||
33 | * backref_node, mapping_node and tree_block start with this | ||
34 | */ | ||
35 | struct tree_entry { | ||
36 | struct rb_node rb_node; | ||
37 | u64 bytenr; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * present a tree block in the backref cache | ||
42 | */ | ||
43 | struct backref_node { | ||
44 | struct rb_node rb_node; | ||
45 | u64 bytenr; | ||
46 | /* objectid tree block owner */ | ||
47 | u64 owner; | ||
48 | /* list of upper level blocks reference this block */ | ||
49 | struct list_head upper; | ||
50 | /* list of child blocks in the cache */ | ||
51 | struct list_head lower; | ||
52 | /* NULL if this node is not tree root */ | ||
53 | struct btrfs_root *root; | ||
54 | /* extent buffer got by COW the block */ | ||
55 | struct extent_buffer *eb; | ||
56 | /* level of tree block */ | ||
57 | unsigned int level:8; | ||
58 | /* 1 if the block is root of old snapshot */ | ||
59 | unsigned int old_root:1; | ||
60 | /* 1 if no child blocks in the cache */ | ||
61 | unsigned int lowest:1; | ||
62 | /* is the extent buffer locked */ | ||
63 | unsigned int locked:1; | ||
64 | /* has the block been processed */ | ||
65 | unsigned int processed:1; | ||
66 | /* have backrefs of this block been checked */ | ||
67 | unsigned int checked:1; | ||
68 | }; | ||
69 | |||
70 | /* | ||
71 | * present a block pointer in the backref cache | ||
72 | */ | ||
73 | struct backref_edge { | ||
74 | struct list_head list[2]; | ||
75 | struct backref_node *node[2]; | ||
76 | u64 blockptr; | ||
77 | }; | ||
78 | |||
79 | #define LOWER 0 | ||
80 | #define UPPER 1 | ||
81 | |||
82 | struct backref_cache { | ||
83 | /* red black tree of all backref nodes in the cache */ | ||
84 | struct rb_root rb_root; | ||
85 | /* list of backref nodes with no child block in the cache */ | ||
86 | struct list_head pending[BTRFS_MAX_LEVEL]; | ||
87 | spinlock_t lock; | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * map address of tree root to tree | ||
92 | */ | ||
93 | struct mapping_node { | ||
94 | struct rb_node rb_node; | ||
95 | u64 bytenr; | ||
96 | void *data; | ||
97 | }; | ||
98 | |||
99 | struct mapping_tree { | ||
100 | struct rb_root rb_root; | ||
101 | spinlock_t lock; | ||
102 | }; | ||
103 | |||
104 | /* | ||
105 | * present a tree block to process | ||
106 | */ | ||
107 | struct tree_block { | ||
108 | struct rb_node rb_node; | ||
109 | u64 bytenr; | ||
110 | struct btrfs_key key; | ||
111 | unsigned int level:8; | ||
112 | unsigned int key_ready:1; | ||
113 | }; | ||
114 | |||
115 | /* inode vector */ | ||
116 | #define INODEVEC_SIZE 16 | ||
117 | |||
118 | struct inodevec { | ||
119 | struct list_head list; | ||
120 | struct inode *inode[INODEVEC_SIZE]; | ||
121 | int nr; | ||
122 | }; | ||
123 | |||
124 | struct reloc_control { | ||
125 | /* block group to relocate */ | ||
126 | struct btrfs_block_group_cache *block_group; | ||
127 | /* extent tree */ | ||
128 | struct btrfs_root *extent_root; | ||
129 | /* inode for moving data */ | ||
130 | struct inode *data_inode; | ||
131 | struct btrfs_workers workers; | ||
132 | /* tree blocks have been processed */ | ||
133 | struct extent_io_tree processed_blocks; | ||
134 | /* map start of tree root to corresponding reloc tree */ | ||
135 | struct mapping_tree reloc_root_tree; | ||
136 | /* list of reloc trees */ | ||
137 | struct list_head reloc_roots; | ||
138 | u64 search_start; | ||
139 | u64 extents_found; | ||
140 | u64 extents_skipped; | ||
141 | int stage; | ||
142 | int create_reloc_root; | ||
143 | unsigned int found_file_extent:1; | ||
144 | unsigned int found_old_snapshot:1; | ||
145 | }; | ||
146 | |||
147 | /* stages of data relocation */ | ||
148 | #define MOVE_DATA_EXTENTS 0 | ||
149 | #define UPDATE_DATA_PTRS 1 | ||
150 | |||
151 | /* | ||
152 | * merge reloc tree to corresponding fs tree in worker threads | ||
153 | */ | ||
154 | struct async_merge { | ||
155 | struct btrfs_work work; | ||
156 | struct reloc_control *rc; | ||
157 | struct btrfs_root *root; | ||
158 | struct completion *done; | ||
159 | atomic_t *num_pending; | ||
160 | }; | ||
161 | |||
162 | static void mapping_tree_init(struct mapping_tree *tree) | ||
163 | { | ||
164 | tree->rb_root.rb_node = NULL; | ||
165 | spin_lock_init(&tree->lock); | ||
166 | } | ||
167 | |||
168 | static void backref_cache_init(struct backref_cache *cache) | ||
169 | { | ||
170 | int i; | ||
171 | cache->rb_root.rb_node = NULL; | ||
172 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | ||
173 | INIT_LIST_HEAD(&cache->pending[i]); | ||
174 | spin_lock_init(&cache->lock); | ||
175 | } | ||
176 | |||
177 | static void backref_node_init(struct backref_node *node) | ||
178 | { | ||
179 | memset(node, 0, sizeof(*node)); | ||
180 | INIT_LIST_HEAD(&node->upper); | ||
181 | INIT_LIST_HEAD(&node->lower); | ||
182 | RB_CLEAR_NODE(&node->rb_node); | ||
183 | } | ||
184 | |||
185 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | ||
186 | struct rb_node *node) | ||
187 | { | ||
188 | struct rb_node **p = &root->rb_node; | ||
189 | struct rb_node *parent = NULL; | ||
190 | struct tree_entry *entry; | ||
191 | |||
192 | while (*p) { | ||
193 | parent = *p; | ||
194 | entry = rb_entry(parent, struct tree_entry, rb_node); | ||
195 | |||
196 | if (bytenr < entry->bytenr) | ||
197 | p = &(*p)->rb_left; | ||
198 | else if (bytenr > entry->bytenr) | ||
199 | p = &(*p)->rb_right; | ||
200 | else | ||
201 | return parent; | ||
202 | } | ||
203 | |||
204 | rb_link_node(node, parent, p); | ||
205 | rb_insert_color(node, root); | ||
206 | return NULL; | ||
207 | } | ||
208 | |||
209 | static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) | ||
210 | { | ||
211 | struct rb_node *n = root->rb_node; | ||
212 | struct tree_entry *entry; | ||
213 | |||
214 | while (n) { | ||
215 | entry = rb_entry(n, struct tree_entry, rb_node); | ||
216 | |||
217 | if (bytenr < entry->bytenr) | ||
218 | n = n->rb_left; | ||
219 | else if (bytenr > entry->bytenr) | ||
220 | n = n->rb_right; | ||
221 | else | ||
222 | return n; | ||
223 | } | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * walk up backref nodes until reach node presents tree root | ||
229 | */ | ||
230 | static struct backref_node *walk_up_backref(struct backref_node *node, | ||
231 | struct backref_edge *edges[], | ||
232 | int *index) | ||
233 | { | ||
234 | struct backref_edge *edge; | ||
235 | int idx = *index; | ||
236 | |||
237 | while (!list_empty(&node->upper)) { | ||
238 | edge = list_entry(node->upper.next, | ||
239 | struct backref_edge, list[LOWER]); | ||
240 | edges[idx++] = edge; | ||
241 | node = edge->node[UPPER]; | ||
242 | } | ||
243 | *index = idx; | ||
244 | return node; | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * walk down backref nodes to find start of next reference path | ||
249 | */ | ||
250 | static struct backref_node *walk_down_backref(struct backref_edge *edges[], | ||
251 | int *index) | ||
252 | { | ||
253 | struct backref_edge *edge; | ||
254 | struct backref_node *lower; | ||
255 | int idx = *index; | ||
256 | |||
257 | while (idx > 0) { | ||
258 | edge = edges[idx - 1]; | ||
259 | lower = edge->node[LOWER]; | ||
260 | if (list_is_last(&edge->list[LOWER], &lower->upper)) { | ||
261 | idx--; | ||
262 | continue; | ||
263 | } | ||
264 | edge = list_entry(edge->list[LOWER].next, | ||
265 | struct backref_edge, list[LOWER]); | ||
266 | edges[idx - 1] = edge; | ||
267 | *index = idx; | ||
268 | return edge->node[UPPER]; | ||
269 | } | ||
270 | *index = 0; | ||
271 | return NULL; | ||
272 | } | ||
273 | |||
274 | static void drop_node_buffer(struct backref_node *node) | ||
275 | { | ||
276 | if (node->eb) { | ||
277 | if (node->locked) { | ||
278 | btrfs_tree_unlock(node->eb); | ||
279 | node->locked = 0; | ||
280 | } | ||
281 | free_extent_buffer(node->eb); | ||
282 | node->eb = NULL; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | static void drop_backref_node(struct backref_cache *tree, | ||
287 | struct backref_node *node) | ||
288 | { | ||
289 | BUG_ON(!node->lowest); | ||
290 | BUG_ON(!list_empty(&node->upper)); | ||
291 | |||
292 | drop_node_buffer(node); | ||
293 | list_del(&node->lower); | ||
294 | |||
295 | rb_erase(&node->rb_node, &tree->rb_root); | ||
296 | kfree(node); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * remove a backref node from the backref cache | ||
301 | */ | ||
302 | static void remove_backref_node(struct backref_cache *cache, | ||
303 | struct backref_node *node) | ||
304 | { | ||
305 | struct backref_node *upper; | ||
306 | struct backref_edge *edge; | ||
307 | |||
308 | if (!node) | ||
309 | return; | ||
310 | |||
311 | BUG_ON(!node->lowest); | ||
312 | while (!list_empty(&node->upper)) { | ||
313 | edge = list_entry(node->upper.next, struct backref_edge, | ||
314 | list[LOWER]); | ||
315 | upper = edge->node[UPPER]; | ||
316 | list_del(&edge->list[LOWER]); | ||
317 | list_del(&edge->list[UPPER]); | ||
318 | kfree(edge); | ||
319 | /* | ||
320 | * add the node to pending list if no other | ||
321 | * child block cached. | ||
322 | */ | ||
323 | if (list_empty(&upper->lower)) { | ||
324 | list_add_tail(&upper->lower, | ||
325 | &cache->pending[upper->level]); | ||
326 | upper->lowest = 1; | ||
327 | } | ||
328 | } | ||
329 | drop_backref_node(cache, node); | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * find reloc tree by address of tree root | ||
334 | */ | ||
335 | static struct btrfs_root *find_reloc_root(struct reloc_control *rc, | ||
336 | u64 bytenr) | ||
337 | { | ||
338 | struct rb_node *rb_node; | ||
339 | struct mapping_node *node; | ||
340 | struct btrfs_root *root = NULL; | ||
341 | |||
342 | spin_lock(&rc->reloc_root_tree.lock); | ||
343 | rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); | ||
344 | if (rb_node) { | ||
345 | node = rb_entry(rb_node, struct mapping_node, rb_node); | ||
346 | root = (struct btrfs_root *)node->data; | ||
347 | } | ||
348 | spin_unlock(&rc->reloc_root_tree.lock); | ||
349 | return root; | ||
350 | } | ||
351 | |||
352 | static int is_cowonly_root(u64 root_objectid) | ||
353 | { | ||
354 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || | ||
355 | root_objectid == BTRFS_EXTENT_TREE_OBJECTID || | ||
356 | root_objectid == BTRFS_CHUNK_TREE_OBJECTID || | ||
357 | root_objectid == BTRFS_DEV_TREE_OBJECTID || | ||
358 | root_objectid == BTRFS_TREE_LOG_OBJECTID || | ||
359 | root_objectid == BTRFS_CSUM_TREE_OBJECTID) | ||
360 | return 1; | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, | ||
365 | u64 root_objectid) | ||
366 | { | ||
367 | struct btrfs_key key; | ||
368 | |||
369 | key.objectid = root_objectid; | ||
370 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
371 | if (is_cowonly_root(root_objectid)) | ||
372 | key.offset = 0; | ||
373 | else | ||
374 | key.offset = (u64)-1; | ||
375 | |||
376 | return btrfs_read_fs_root_no_name(fs_info, &key); | ||
377 | } | ||
378 | |||
379 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
380 | static noinline_for_stack | ||
381 | struct btrfs_root *find_tree_root(struct reloc_control *rc, | ||
382 | struct extent_buffer *leaf, | ||
383 | struct btrfs_extent_ref_v0 *ref0) | ||
384 | { | ||
385 | struct btrfs_root *root; | ||
386 | u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); | ||
387 | u64 generation = btrfs_ref_generation_v0(leaf, ref0); | ||
388 | |||
389 | BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); | ||
390 | |||
391 | root = read_fs_root(rc->extent_root->fs_info, root_objectid); | ||
392 | BUG_ON(IS_ERR(root)); | ||
393 | |||
394 | if (root->ref_cows && | ||
395 | generation != btrfs_root_generation(&root->root_item)) | ||
396 | return NULL; | ||
397 | |||
398 | return root; | ||
399 | } | ||
400 | #endif | ||
401 | |||
402 | static noinline_for_stack | ||
403 | int find_inline_backref(struct extent_buffer *leaf, int slot, | ||
404 | unsigned long *ptr, unsigned long *end) | ||
405 | { | ||
406 | struct btrfs_extent_item *ei; | ||
407 | struct btrfs_tree_block_info *bi; | ||
408 | u32 item_size; | ||
409 | |||
410 | item_size = btrfs_item_size_nr(leaf, slot); | ||
411 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
412 | if (item_size < sizeof(*ei)) { | ||
413 | WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); | ||
414 | return 1; | ||
415 | } | ||
416 | #endif | ||
417 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | ||
418 | WARN_ON(!(btrfs_extent_flags(leaf, ei) & | ||
419 | BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
420 | |||
421 | if (item_size <= sizeof(*ei) + sizeof(*bi)) { | ||
422 | WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); | ||
423 | return 1; | ||
424 | } | ||
425 | |||
426 | bi = (struct btrfs_tree_block_info *)(ei + 1); | ||
427 | *ptr = (unsigned long)(bi + 1); | ||
428 | *end = (unsigned long)ei + item_size; | ||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * build backref tree for a given tree block. root of the backref tree | ||
434 | * corresponds the tree block, leaves of the backref tree correspond | ||
435 | * roots of b-trees that reference the tree block. | ||
436 | * | ||
437 | * the basic idea of this function is check backrefs of a given block | ||
438 | * to find upper level blocks that refernece the block, and then check | ||
439 | * bakcrefs of these upper level blocks recursively. the recursion stop | ||
440 | * when tree root is reached or backrefs for the block is cached. | ||
441 | * | ||
442 | * NOTE: if we find backrefs for a block are cached, we know backrefs | ||
443 | * for all upper level blocks that directly/indirectly reference the | ||
444 | * block are also cached. | ||
445 | */ | ||
446 | static struct backref_node *build_backref_tree(struct reloc_control *rc, | ||
447 | struct backref_cache *cache, | ||
448 | struct btrfs_key *node_key, | ||
449 | int level, u64 bytenr) | ||
450 | { | ||
451 | struct btrfs_path *path1; | ||
452 | struct btrfs_path *path2; | ||
453 | struct extent_buffer *eb; | ||
454 | struct btrfs_root *root; | ||
455 | struct backref_node *cur; | ||
456 | struct backref_node *upper; | ||
457 | struct backref_node *lower; | ||
458 | struct backref_node *node = NULL; | ||
459 | struct backref_node *exist = NULL; | ||
460 | struct backref_edge *edge; | ||
461 | struct rb_node *rb_node; | ||
462 | struct btrfs_key key; | ||
463 | unsigned long end; | ||
464 | unsigned long ptr; | ||
465 | LIST_HEAD(list); | ||
466 | int ret; | ||
467 | int err = 0; | ||
468 | |||
469 | path1 = btrfs_alloc_path(); | ||
470 | path2 = btrfs_alloc_path(); | ||
471 | if (!path1 || !path2) { | ||
472 | err = -ENOMEM; | ||
473 | goto out; | ||
474 | } | ||
475 | |||
476 | node = kmalloc(sizeof(*node), GFP_NOFS); | ||
477 | if (!node) { | ||
478 | err = -ENOMEM; | ||
479 | goto out; | ||
480 | } | ||
481 | |||
482 | backref_node_init(node); | ||
483 | node->bytenr = bytenr; | ||
484 | node->owner = 0; | ||
485 | node->level = level; | ||
486 | node->lowest = 1; | ||
487 | cur = node; | ||
488 | again: | ||
489 | end = 0; | ||
490 | ptr = 0; | ||
491 | key.objectid = cur->bytenr; | ||
492 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
493 | key.offset = (u64)-1; | ||
494 | |||
495 | path1->search_commit_root = 1; | ||
496 | path1->skip_locking = 1; | ||
497 | ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, | ||
498 | 0, 0); | ||
499 | if (ret < 0) { | ||
500 | err = ret; | ||
501 | goto out; | ||
502 | } | ||
503 | BUG_ON(!ret || !path1->slots[0]); | ||
504 | |||
505 | path1->slots[0]--; | ||
506 | |||
507 | WARN_ON(cur->checked); | ||
508 | if (!list_empty(&cur->upper)) { | ||
509 | /* | ||
510 | * the backref was added previously when processsing | ||
511 | * backref of type BTRFS_TREE_BLOCK_REF_KEY | ||
512 | */ | ||
513 | BUG_ON(!list_is_singular(&cur->upper)); | ||
514 | edge = list_entry(cur->upper.next, struct backref_edge, | ||
515 | list[LOWER]); | ||
516 | BUG_ON(!list_empty(&edge->list[UPPER])); | ||
517 | exist = edge->node[UPPER]; | ||
518 | /* | ||
519 | * add the upper level block to pending list if we need | ||
520 | * check its backrefs | ||
521 | */ | ||
522 | if (!exist->checked) | ||
523 | list_add_tail(&edge->list[UPPER], &list); | ||
524 | } else { | ||
525 | exist = NULL; | ||
526 | } | ||
527 | |||
528 | while (1) { | ||
529 | cond_resched(); | ||
530 | eb = path1->nodes[0]; | ||
531 | |||
532 | if (ptr >= end) { | ||
533 | if (path1->slots[0] >= btrfs_header_nritems(eb)) { | ||
534 | ret = btrfs_next_leaf(rc->extent_root, path1); | ||
535 | if (ret < 0) { | ||
536 | err = ret; | ||
537 | goto out; | ||
538 | } | ||
539 | if (ret > 0) | ||
540 | break; | ||
541 | eb = path1->nodes[0]; | ||
542 | } | ||
543 | |||
544 | btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); | ||
545 | if (key.objectid != cur->bytenr) { | ||
546 | WARN_ON(exist); | ||
547 | break; | ||
548 | } | ||
549 | |||
550 | if (key.type == BTRFS_EXTENT_ITEM_KEY) { | ||
551 | ret = find_inline_backref(eb, path1->slots[0], | ||
552 | &ptr, &end); | ||
553 | if (ret) | ||
554 | goto next; | ||
555 | } | ||
556 | } | ||
557 | |||
558 | if (ptr < end) { | ||
559 | /* update key for inline back ref */ | ||
560 | struct btrfs_extent_inline_ref *iref; | ||
561 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
562 | key.type = btrfs_extent_inline_ref_type(eb, iref); | ||
563 | key.offset = btrfs_extent_inline_ref_offset(eb, iref); | ||
564 | WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && | ||
565 | key.type != BTRFS_SHARED_BLOCK_REF_KEY); | ||
566 | } | ||
567 | |||
568 | if (exist && | ||
569 | ((key.type == BTRFS_TREE_BLOCK_REF_KEY && | ||
570 | exist->owner == key.offset) || | ||
571 | (key.type == BTRFS_SHARED_BLOCK_REF_KEY && | ||
572 | exist->bytenr == key.offset))) { | ||
573 | exist = NULL; | ||
574 | goto next; | ||
575 | } | ||
576 | |||
577 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
578 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || | ||
579 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
580 | if (key.objectid == key.offset && | ||
581 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
582 | struct btrfs_extent_ref_v0 *ref0; | ||
583 | ref0 = btrfs_item_ptr(eb, path1->slots[0], | ||
584 | struct btrfs_extent_ref_v0); | ||
585 | root = find_tree_root(rc, eb, ref0); | ||
586 | if (root) | ||
587 | cur->root = root; | ||
588 | else | ||
589 | cur->old_root = 1; | ||
590 | break; | ||
591 | } | ||
592 | #else | ||
593 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | ||
594 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { | ||
595 | #endif | ||
596 | if (key.objectid == key.offset) { | ||
597 | /* | ||
598 | * only root blocks of reloc trees use | ||
599 | * backref of this type. | ||
600 | */ | ||
601 | root = find_reloc_root(rc, cur->bytenr); | ||
602 | BUG_ON(!root); | ||
603 | cur->root = root; | ||
604 | break; | ||
605 | } | ||
606 | |||
607 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
608 | if (!edge) { | ||
609 | err = -ENOMEM; | ||
610 | goto out; | ||
611 | } | ||
612 | rb_node = tree_search(&cache->rb_root, key.offset); | ||
613 | if (!rb_node) { | ||
614 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | ||
615 | if (!upper) { | ||
616 | kfree(edge); | ||
617 | err = -ENOMEM; | ||
618 | goto out; | ||
619 | } | ||
620 | backref_node_init(upper); | ||
621 | upper->bytenr = key.offset; | ||
622 | upper->owner = 0; | ||
623 | upper->level = cur->level + 1; | ||
624 | /* | ||
625 | * backrefs for the upper level block isn't | ||
626 | * cached, add the block to pending list | ||
627 | */ | ||
628 | list_add_tail(&edge->list[UPPER], &list); | ||
629 | } else { | ||
630 | upper = rb_entry(rb_node, struct backref_node, | ||
631 | rb_node); | ||
632 | INIT_LIST_HEAD(&edge->list[UPPER]); | ||
633 | } | ||
634 | list_add(&edge->list[LOWER], &cur->upper); | ||
635 | edge->node[UPPER] = upper; | ||
636 | edge->node[LOWER] = cur; | ||
637 | |||
638 | goto next; | ||
639 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { | ||
640 | goto next; | ||
641 | } | ||
642 | |||
643 | /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ | ||
644 | root = read_fs_root(rc->extent_root->fs_info, key.offset); | ||
645 | if (IS_ERR(root)) { | ||
646 | err = PTR_ERR(root); | ||
647 | goto out; | ||
648 | } | ||
649 | |||
650 | if (btrfs_root_level(&root->root_item) == cur->level) { | ||
651 | /* tree root */ | ||
652 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | ||
653 | cur->bytenr); | ||
654 | cur->root = root; | ||
655 | break; | ||
656 | } | ||
657 | |||
658 | level = cur->level + 1; | ||
659 | |||
660 | /* | ||
661 | * searching the tree to find upper level blocks | ||
662 | * reference the block. | ||
663 | */ | ||
664 | path2->search_commit_root = 1; | ||
665 | path2->skip_locking = 1; | ||
666 | path2->lowest_level = level; | ||
667 | ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); | ||
668 | path2->lowest_level = 0; | ||
669 | if (ret < 0) { | ||
670 | err = ret; | ||
671 | goto out; | ||
672 | } | ||
673 | |||
674 | eb = path2->nodes[level]; | ||
675 | WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != | ||
676 | cur->bytenr); | ||
677 | |||
678 | lower = cur; | ||
679 | for (; level < BTRFS_MAX_LEVEL; level++) { | ||
680 | if (!path2->nodes[level]) { | ||
681 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | ||
682 | lower->bytenr); | ||
683 | lower->root = root; | ||
684 | break; | ||
685 | } | ||
686 | |||
687 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
688 | if (!edge) { | ||
689 | err = -ENOMEM; | ||
690 | goto out; | ||
691 | } | ||
692 | |||
693 | eb = path2->nodes[level]; | ||
694 | rb_node = tree_search(&cache->rb_root, eb->start); | ||
695 | if (!rb_node) { | ||
696 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | ||
697 | if (!upper) { | ||
698 | kfree(edge); | ||
699 | err = -ENOMEM; | ||
700 | goto out; | ||
701 | } | ||
702 | backref_node_init(upper); | ||
703 | upper->bytenr = eb->start; | ||
704 | upper->owner = btrfs_header_owner(eb); | ||
705 | upper->level = lower->level + 1; | ||
706 | |||
707 | /* | ||
708 | * if we know the block isn't shared | ||
709 | * we can void checking its backrefs. | ||
710 | */ | ||
711 | if (btrfs_block_can_be_shared(root, eb)) | ||
712 | upper->checked = 0; | ||
713 | else | ||
714 | upper->checked = 1; | ||
715 | |||
716 | /* | ||
717 | * add the block to pending list if we | ||
718 | * need check its backrefs. only block | ||
719 | * at 'cur->level + 1' is added to the | ||
720 | * tail of pending list. this guarantees | ||
721 | * we check backrefs from lower level | ||
722 | * blocks to upper level blocks. | ||
723 | */ | ||
724 | if (!upper->checked && | ||
725 | level == cur->level + 1) { | ||
726 | list_add_tail(&edge->list[UPPER], | ||
727 | &list); | ||
728 | } else | ||
729 | INIT_LIST_HEAD(&edge->list[UPPER]); | ||
730 | } else { | ||
731 | upper = rb_entry(rb_node, struct backref_node, | ||
732 | rb_node); | ||
733 | BUG_ON(!upper->checked); | ||
734 | INIT_LIST_HEAD(&edge->list[UPPER]); | ||
735 | } | ||
736 | list_add_tail(&edge->list[LOWER], &lower->upper); | ||
737 | edge->node[UPPER] = upper; | ||
738 | edge->node[LOWER] = lower; | ||
739 | |||
740 | if (rb_node) | ||
741 | break; | ||
742 | lower = upper; | ||
743 | upper = NULL; | ||
744 | } | ||
745 | btrfs_release_path(root, path2); | ||
746 | next: | ||
747 | if (ptr < end) { | ||
748 | ptr += btrfs_extent_inline_ref_size(key.type); | ||
749 | if (ptr >= end) { | ||
750 | WARN_ON(ptr > end); | ||
751 | ptr = 0; | ||
752 | end = 0; | ||
753 | } | ||
754 | } | ||
755 | if (ptr >= end) | ||
756 | path1->slots[0]++; | ||
757 | } | ||
758 | btrfs_release_path(rc->extent_root, path1); | ||
759 | |||
760 | cur->checked = 1; | ||
761 | WARN_ON(exist); | ||
762 | |||
763 | /* the pending list isn't empty, take the first block to process */ | ||
764 | if (!list_empty(&list)) { | ||
765 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | ||
766 | list_del_init(&edge->list[UPPER]); | ||
767 | cur = edge->node[UPPER]; | ||
768 | goto again; | ||
769 | } | ||
770 | |||
771 | /* | ||
772 | * everything goes well, connect backref nodes and insert backref nodes | ||
773 | * into the cache. | ||
774 | */ | ||
775 | BUG_ON(!node->checked); | ||
776 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | ||
777 | BUG_ON(rb_node); | ||
778 | |||
779 | list_for_each_entry(edge, &node->upper, list[LOWER]) | ||
780 | list_add_tail(&edge->list[UPPER], &list); | ||
781 | |||
782 | while (!list_empty(&list)) { | ||
783 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | ||
784 | list_del_init(&edge->list[UPPER]); | ||
785 | upper = edge->node[UPPER]; | ||
786 | |||
787 | if (!RB_EMPTY_NODE(&upper->rb_node)) { | ||
788 | if (upper->lowest) { | ||
789 | list_del_init(&upper->lower); | ||
790 | upper->lowest = 0; | ||
791 | } | ||
792 | |||
793 | list_add_tail(&edge->list[UPPER], &upper->lower); | ||
794 | continue; | ||
795 | } | ||
796 | |||
797 | BUG_ON(!upper->checked); | ||
798 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | ||
799 | &upper->rb_node); | ||
800 | BUG_ON(rb_node); | ||
801 | |||
802 | list_add_tail(&edge->list[UPPER], &upper->lower); | ||
803 | |||
804 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | ||
805 | list_add_tail(&edge->list[UPPER], &list); | ||
806 | } | ||
807 | out: | ||
808 | btrfs_free_path(path1); | ||
809 | btrfs_free_path(path2); | ||
810 | if (err) { | ||
811 | INIT_LIST_HEAD(&list); | ||
812 | upper = node; | ||
813 | while (upper) { | ||
814 | if (RB_EMPTY_NODE(&upper->rb_node)) { | ||
815 | list_splice_tail(&upper->upper, &list); | ||
816 | kfree(upper); | ||
817 | } | ||
818 | |||
819 | if (list_empty(&list)) | ||
820 | break; | ||
821 | |||
822 | edge = list_entry(list.next, struct backref_edge, | ||
823 | list[LOWER]); | ||
824 | upper = edge->node[UPPER]; | ||
825 | kfree(edge); | ||
826 | } | ||
827 | return ERR_PTR(err); | ||
828 | } | ||
829 | return node; | ||
830 | } | ||
831 | |||
832 | /* | ||
833 | * helper to add 'address of tree root -> reloc tree' mapping | ||
834 | */ | ||
835 | static int __add_reloc_root(struct btrfs_root *root) | ||
836 | { | ||
837 | struct rb_node *rb_node; | ||
838 | struct mapping_node *node; | ||
839 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
840 | |||
841 | node = kmalloc(sizeof(*node), GFP_NOFS); | ||
842 | BUG_ON(!node); | ||
843 | |||
844 | node->bytenr = root->node->start; | ||
845 | node->data = root; | ||
846 | |||
847 | spin_lock(&rc->reloc_root_tree.lock); | ||
848 | rb_node = tree_insert(&rc->reloc_root_tree.rb_root, | ||
849 | node->bytenr, &node->rb_node); | ||
850 | spin_unlock(&rc->reloc_root_tree.lock); | ||
851 | BUG_ON(rb_node); | ||
852 | |||
853 | list_add_tail(&root->root_list, &rc->reloc_roots); | ||
854 | return 0; | ||
855 | } | ||
856 | |||
857 | /* | ||
858 | * helper to update/delete the 'address of tree root -> reloc tree' | ||
859 | * mapping | ||
860 | */ | ||
861 | static int __update_reloc_root(struct btrfs_root *root, int del) | ||
862 | { | ||
863 | struct rb_node *rb_node; | ||
864 | struct mapping_node *node = NULL; | ||
865 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
866 | |||
867 | spin_lock(&rc->reloc_root_tree.lock); | ||
868 | rb_node = tree_search(&rc->reloc_root_tree.rb_root, | ||
869 | root->commit_root->start); | ||
870 | if (rb_node) { | ||
871 | node = rb_entry(rb_node, struct mapping_node, rb_node); | ||
872 | rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); | ||
873 | } | ||
874 | spin_unlock(&rc->reloc_root_tree.lock); | ||
875 | |||
876 | BUG_ON((struct btrfs_root *)node->data != root); | ||
877 | |||
878 | if (!del) { | ||
879 | spin_lock(&rc->reloc_root_tree.lock); | ||
880 | node->bytenr = root->node->start; | ||
881 | rb_node = tree_insert(&rc->reloc_root_tree.rb_root, | ||
882 | node->bytenr, &node->rb_node); | ||
883 | spin_unlock(&rc->reloc_root_tree.lock); | ||
884 | BUG_ON(rb_node); | ||
885 | } else { | ||
886 | list_del_init(&root->root_list); | ||
887 | kfree(node); | ||
888 | } | ||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | /* | ||
893 | * create reloc tree for a given fs tree. reloc tree is just a | ||
894 | * snapshot of the fs tree with special root objectid. | ||
895 | */ | ||
896 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
897 | struct btrfs_root *root) | ||
898 | { | ||
899 | struct btrfs_root *reloc_root; | ||
900 | struct extent_buffer *eb; | ||
901 | struct btrfs_root_item *root_item; | ||
902 | struct btrfs_key root_key; | ||
903 | int ret; | ||
904 | |||
905 | if (root->reloc_root) { | ||
906 | reloc_root = root->reloc_root; | ||
907 | reloc_root->last_trans = trans->transid; | ||
908 | return 0; | ||
909 | } | ||
910 | |||
911 | if (!root->fs_info->reloc_ctl || | ||
912 | !root->fs_info->reloc_ctl->create_reloc_root || | ||
913 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
914 | return 0; | ||
915 | |||
916 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | ||
917 | BUG_ON(!root_item); | ||
918 | |||
919 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | ||
920 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
921 | root_key.offset = root->root_key.objectid; | ||
922 | |||
923 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, | ||
924 | BTRFS_TREE_RELOC_OBJECTID); | ||
925 | BUG_ON(ret); | ||
926 | |||
927 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); | ||
928 | memcpy(root_item, &root->root_item, sizeof(*root_item)); | ||
929 | btrfs_set_root_refs(root_item, 1); | ||
930 | btrfs_set_root_bytenr(root_item, eb->start); | ||
931 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | ||
932 | btrfs_set_root_generation(root_item, trans->transid); | ||
933 | memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); | ||
934 | root_item->drop_level = 0; | ||
935 | |||
936 | btrfs_tree_unlock(eb); | ||
937 | free_extent_buffer(eb); | ||
938 | |||
939 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
940 | &root_key, root_item); | ||
941 | BUG_ON(ret); | ||
942 | kfree(root_item); | ||
943 | |||
944 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
945 | &root_key); | ||
946 | BUG_ON(IS_ERR(reloc_root)); | ||
947 | reloc_root->last_trans = trans->transid; | ||
948 | |||
949 | __add_reloc_root(reloc_root); | ||
950 | root->reloc_root = reloc_root; | ||
951 | return 0; | ||
952 | } | ||
953 | |||
954 | /* | ||
955 | * update root item of reloc tree | ||
956 | */ | ||
957 | int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | ||
958 | struct btrfs_root *root) | ||
959 | { | ||
960 | struct btrfs_root *reloc_root; | ||
961 | struct btrfs_root_item *root_item; | ||
962 | int del = 0; | ||
963 | int ret; | ||
964 | |||
965 | if (!root->reloc_root) | ||
966 | return 0; | ||
967 | |||
968 | reloc_root = root->reloc_root; | ||
969 | root_item = &reloc_root->root_item; | ||
970 | |||
971 | if (btrfs_root_refs(root_item) == 0) { | ||
972 | root->reloc_root = NULL; | ||
973 | del = 1; | ||
974 | } | ||
975 | |||
976 | __update_reloc_root(reloc_root, del); | ||
977 | |||
978 | if (reloc_root->commit_root != reloc_root->node) { | ||
979 | btrfs_set_root_node(root_item, reloc_root->node); | ||
980 | free_extent_buffer(reloc_root->commit_root); | ||
981 | reloc_root->commit_root = btrfs_root_node(reloc_root); | ||
982 | } | ||
983 | |||
984 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
985 | &reloc_root->root_key, root_item); | ||
986 | BUG_ON(ret); | ||
987 | return 0; | ||
988 | } | ||
989 | |||
990 | /* | ||
991 | * helper to find first cached inode with inode number >= objectid | ||
992 | * in a subvolume | ||
993 | */ | ||
994 | static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) | ||
995 | { | ||
996 | struct rb_node *node; | ||
997 | struct rb_node *prev; | ||
998 | struct btrfs_inode *entry; | ||
999 | struct inode *inode; | ||
1000 | |||
1001 | spin_lock(&root->inode_lock); | ||
1002 | again: | ||
1003 | node = root->inode_tree.rb_node; | ||
1004 | prev = NULL; | ||
1005 | while (node) { | ||
1006 | prev = node; | ||
1007 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
1008 | |||
1009 | if (objectid < entry->vfs_inode.i_ino) | ||
1010 | node = node->rb_left; | ||
1011 | else if (objectid > entry->vfs_inode.i_ino) | ||
1012 | node = node->rb_right; | ||
1013 | else | ||
1014 | break; | ||
1015 | } | ||
1016 | if (!node) { | ||
1017 | while (prev) { | ||
1018 | entry = rb_entry(prev, struct btrfs_inode, rb_node); | ||
1019 | if (objectid <= entry->vfs_inode.i_ino) { | ||
1020 | node = prev; | ||
1021 | break; | ||
1022 | } | ||
1023 | prev = rb_next(prev); | ||
1024 | } | ||
1025 | } | ||
1026 | while (node) { | ||
1027 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
1028 | inode = igrab(&entry->vfs_inode); | ||
1029 | if (inode) { | ||
1030 | spin_unlock(&root->inode_lock); | ||
1031 | return inode; | ||
1032 | } | ||
1033 | |||
1034 | objectid = entry->vfs_inode.i_ino + 1; | ||
1035 | if (cond_resched_lock(&root->inode_lock)) | ||
1036 | goto again; | ||
1037 | |||
1038 | node = rb_next(node); | ||
1039 | } | ||
1040 | spin_unlock(&root->inode_lock); | ||
1041 | return NULL; | ||
1042 | } | ||
1043 | |||
1044 | static int in_block_group(u64 bytenr, | ||
1045 | struct btrfs_block_group_cache *block_group) | ||
1046 | { | ||
1047 | if (bytenr >= block_group->key.objectid && | ||
1048 | bytenr < block_group->key.objectid + block_group->key.offset) | ||
1049 | return 1; | ||
1050 | return 0; | ||
1051 | } | ||
1052 | |||
1053 | /* | ||
1054 | * get new location of data | ||
1055 | */ | ||
1056 | static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | ||
1057 | u64 bytenr, u64 num_bytes) | ||
1058 | { | ||
1059 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | ||
1060 | struct btrfs_path *path; | ||
1061 | struct btrfs_file_extent_item *fi; | ||
1062 | struct extent_buffer *leaf; | ||
1063 | int ret; | ||
1064 | |||
1065 | path = btrfs_alloc_path(); | ||
1066 | if (!path) | ||
1067 | return -ENOMEM; | ||
1068 | |||
1069 | bytenr -= BTRFS_I(reloc_inode)->index_cnt; | ||
1070 | ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, | ||
1071 | bytenr, 0); | ||
1072 | if (ret < 0) | ||
1073 | goto out; | ||
1074 | if (ret > 0) { | ||
1075 | ret = -ENOENT; | ||
1076 | goto out; | ||
1077 | } | ||
1078 | |||
1079 | leaf = path->nodes[0]; | ||
1080 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
1081 | struct btrfs_file_extent_item); | ||
1082 | |||
1083 | BUG_ON(btrfs_file_extent_offset(leaf, fi) || | ||
1084 | btrfs_file_extent_compression(leaf, fi) || | ||
1085 | btrfs_file_extent_encryption(leaf, fi) || | ||
1086 | btrfs_file_extent_other_encoding(leaf, fi)); | ||
1087 | |||
1088 | if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { | ||
1089 | ret = 1; | ||
1090 | goto out; | ||
1091 | } | ||
1092 | |||
1093 | if (new_bytenr) | ||
1094 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
1095 | ret = 0; | ||
1096 | out: | ||
1097 | btrfs_free_path(path); | ||
1098 | return ret; | ||
1099 | } | ||
1100 | |||
1101 | /* | ||
1102 | * update file extent items in the tree leaf to point to | ||
1103 | * the new locations. | ||
1104 | */ | ||
1105 | static int replace_file_extents(struct btrfs_trans_handle *trans, | ||
1106 | struct reloc_control *rc, | ||
1107 | struct btrfs_root *root, | ||
1108 | struct extent_buffer *leaf, | ||
1109 | struct list_head *inode_list) | ||
1110 | { | ||
1111 | struct btrfs_key key; | ||
1112 | struct btrfs_file_extent_item *fi; | ||
1113 | struct inode *inode = NULL; | ||
1114 | struct inodevec *ivec = NULL; | ||
1115 | u64 parent; | ||
1116 | u64 bytenr; | ||
1117 | u64 new_bytenr; | ||
1118 | u64 num_bytes; | ||
1119 | u64 end; | ||
1120 | u32 nritems; | ||
1121 | u32 i; | ||
1122 | int ret; | ||
1123 | int first = 1; | ||
1124 | int dirty = 0; | ||
1125 | |||
1126 | if (rc->stage != UPDATE_DATA_PTRS) | ||
1127 | return 0; | ||
1128 | |||
1129 | /* reloc trees always use full backref */ | ||
1130 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
1131 | parent = leaf->start; | ||
1132 | else | ||
1133 | parent = 0; | ||
1134 | |||
1135 | nritems = btrfs_header_nritems(leaf); | ||
1136 | for (i = 0; i < nritems; i++) { | ||
1137 | cond_resched(); | ||
1138 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
1139 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
1140 | continue; | ||
1141 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
1142 | if (btrfs_file_extent_type(leaf, fi) == | ||
1143 | BTRFS_FILE_EXTENT_INLINE) | ||
1144 | continue; | ||
1145 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
1146 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
1147 | if (bytenr == 0) | ||
1148 | continue; | ||
1149 | if (!in_block_group(bytenr, rc->block_group)) | ||
1150 | continue; | ||
1151 | |||
1152 | /* | ||
1153 | * if we are modifying block in fs tree, wait for readpage | ||
1154 | * to complete and drop the extent cache | ||
1155 | */ | ||
1156 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | ||
1157 | if (!ivec || ivec->nr == INODEVEC_SIZE) { | ||
1158 | ivec = kmalloc(sizeof(*ivec), GFP_NOFS); | ||
1159 | BUG_ON(!ivec); | ||
1160 | ivec->nr = 0; | ||
1161 | list_add_tail(&ivec->list, inode_list); | ||
1162 | } | ||
1163 | if (first) { | ||
1164 | inode = find_next_inode(root, key.objectid); | ||
1165 | if (inode) | ||
1166 | ivec->inode[ivec->nr++] = inode; | ||
1167 | first = 0; | ||
1168 | } else if (inode && inode->i_ino < key.objectid) { | ||
1169 | inode = find_next_inode(root, key.objectid); | ||
1170 | if (inode) | ||
1171 | ivec->inode[ivec->nr++] = inode; | ||
1172 | } | ||
1173 | if (inode && inode->i_ino == key.objectid) { | ||
1174 | end = key.offset + | ||
1175 | btrfs_file_extent_num_bytes(leaf, fi); | ||
1176 | WARN_ON(!IS_ALIGNED(key.offset, | ||
1177 | root->sectorsize)); | ||
1178 | WARN_ON(!IS_ALIGNED(end, root->sectorsize)); | ||
1179 | end--; | ||
1180 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | ||
1181 | key.offset, end, | ||
1182 | GFP_NOFS); | ||
1183 | if (!ret) | ||
1184 | continue; | ||
1185 | |||
1186 | btrfs_drop_extent_cache(inode, key.offset, end, | ||
1187 | 1); | ||
1188 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
1189 | key.offset, end, GFP_NOFS); | ||
1190 | } | ||
1191 | } | ||
1192 | |||
1193 | ret = get_new_location(rc->data_inode, &new_bytenr, | ||
1194 | bytenr, num_bytes); | ||
1195 | if (ret > 0) | ||
1196 | continue; | ||
1197 | BUG_ON(ret < 0); | ||
1198 | |||
1199 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); | ||
1200 | dirty = 1; | ||
1201 | |||
1202 | key.offset -= btrfs_file_extent_offset(leaf, fi); | ||
1203 | ret = btrfs_inc_extent_ref(trans, root, new_bytenr, | ||
1204 | num_bytes, parent, | ||
1205 | btrfs_header_owner(leaf), | ||
1206 | key.objectid, key.offset); | ||
1207 | BUG_ON(ret); | ||
1208 | |||
1209 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | ||
1210 | parent, btrfs_header_owner(leaf), | ||
1211 | key.objectid, key.offset); | ||
1212 | BUG_ON(ret); | ||
1213 | } | ||
1214 | if (dirty) | ||
1215 | btrfs_mark_buffer_dirty(leaf); | ||
1216 | return 0; | ||
1217 | } | ||
1218 | |||
1219 | static noinline_for_stack | ||
1220 | int memcmp_node_keys(struct extent_buffer *eb, int slot, | ||
1221 | struct btrfs_path *path, int level) | ||
1222 | { | ||
1223 | struct btrfs_disk_key key1; | ||
1224 | struct btrfs_disk_key key2; | ||
1225 | btrfs_node_key(eb, &key1, slot); | ||
1226 | btrfs_node_key(path->nodes[level], &key2, path->slots[level]); | ||
1227 | return memcmp(&key1, &key2, sizeof(key1)); | ||
1228 | } | ||
1229 | |||
1230 | /* | ||
1231 | * try to replace tree blocks in fs tree with the new blocks | ||
1232 | * in reloc tree. tree blocks haven't been modified since the | ||
1233 | * reloc tree was create can be replaced. | ||
1234 | * | ||
1235 | * if a block was replaced, level of the block + 1 is returned. | ||
1236 | * if no block got replaced, 0 is returned. if there are other | ||
1237 | * errors, a negative error number is returned. | ||
1238 | */ | ||
1239 | static int replace_path(struct btrfs_trans_handle *trans, | ||
1240 | struct btrfs_root *dest, struct btrfs_root *src, | ||
1241 | struct btrfs_path *path, struct btrfs_key *next_key, | ||
1242 | struct extent_buffer **leaf, | ||
1243 | int lowest_level, int max_level) | ||
1244 | { | ||
1245 | struct extent_buffer *eb; | ||
1246 | struct extent_buffer *parent; | ||
1247 | struct btrfs_key key; | ||
1248 | u64 old_bytenr; | ||
1249 | u64 new_bytenr; | ||
1250 | u64 old_ptr_gen; | ||
1251 | u64 new_ptr_gen; | ||
1252 | u64 last_snapshot; | ||
1253 | u32 blocksize; | ||
1254 | int level; | ||
1255 | int ret; | ||
1256 | int slot; | ||
1257 | |||
1258 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | ||
1259 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); | ||
1260 | BUG_ON(lowest_level > 1 && leaf); | ||
1261 | |||
1262 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); | ||
1263 | |||
1264 | slot = path->slots[lowest_level]; | ||
1265 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); | ||
1266 | |||
1267 | eb = btrfs_lock_root_node(dest); | ||
1268 | btrfs_set_lock_blocking(eb); | ||
1269 | level = btrfs_header_level(eb); | ||
1270 | |||
1271 | if (level < lowest_level) { | ||
1272 | btrfs_tree_unlock(eb); | ||
1273 | free_extent_buffer(eb); | ||
1274 | return 0; | ||
1275 | } | ||
1276 | |||
1277 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); | ||
1278 | BUG_ON(ret); | ||
1279 | btrfs_set_lock_blocking(eb); | ||
1280 | |||
1281 | if (next_key) { | ||
1282 | next_key->objectid = (u64)-1; | ||
1283 | next_key->type = (u8)-1; | ||
1284 | next_key->offset = (u64)-1; | ||
1285 | } | ||
1286 | |||
1287 | parent = eb; | ||
1288 | while (1) { | ||
1289 | level = btrfs_header_level(parent); | ||
1290 | BUG_ON(level < lowest_level); | ||
1291 | |||
1292 | ret = btrfs_bin_search(parent, &key, level, &slot); | ||
1293 | if (ret && slot > 0) | ||
1294 | slot--; | ||
1295 | |||
1296 | if (next_key && slot + 1 < btrfs_header_nritems(parent)) | ||
1297 | btrfs_node_key_to_cpu(parent, next_key, slot + 1); | ||
1298 | |||
1299 | old_bytenr = btrfs_node_blockptr(parent, slot); | ||
1300 | blocksize = btrfs_level_size(dest, level - 1); | ||
1301 | old_ptr_gen = btrfs_node_ptr_generation(parent, slot); | ||
1302 | |||
1303 | if (level <= max_level) { | ||
1304 | eb = path->nodes[level]; | ||
1305 | new_bytenr = btrfs_node_blockptr(eb, | ||
1306 | path->slots[level]); | ||
1307 | new_ptr_gen = btrfs_node_ptr_generation(eb, | ||
1308 | path->slots[level]); | ||
1309 | } else { | ||
1310 | new_bytenr = 0; | ||
1311 | new_ptr_gen = 0; | ||
1312 | } | ||
1313 | |||
1314 | if (new_bytenr > 0 && new_bytenr == old_bytenr) { | ||
1315 | WARN_ON(1); | ||
1316 | ret = level; | ||
1317 | break; | ||
1318 | } | ||
1319 | |||
1320 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || | ||
1321 | memcmp_node_keys(parent, slot, path, level)) { | ||
1322 | if (level <= lowest_level && !leaf) { | ||
1323 | ret = 0; | ||
1324 | break; | ||
1325 | } | ||
1326 | |||
1327 | eb = read_tree_block(dest, old_bytenr, blocksize, | ||
1328 | old_ptr_gen); | ||
1329 | btrfs_tree_lock(eb); | ||
1330 | ret = btrfs_cow_block(trans, dest, eb, parent, | ||
1331 | slot, &eb); | ||
1332 | BUG_ON(ret); | ||
1333 | btrfs_set_lock_blocking(eb); | ||
1334 | |||
1335 | if (level <= lowest_level) { | ||
1336 | *leaf = eb; | ||
1337 | ret = 0; | ||
1338 | break; | ||
1339 | } | ||
1340 | |||
1341 | btrfs_tree_unlock(parent); | ||
1342 | free_extent_buffer(parent); | ||
1343 | |||
1344 | parent = eb; | ||
1345 | continue; | ||
1346 | } | ||
1347 | |||
1348 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
1349 | path->slots[level]); | ||
1350 | btrfs_release_path(src, path); | ||
1351 | |||
1352 | path->lowest_level = level; | ||
1353 | ret = btrfs_search_slot(trans, src, &key, path, 0, 1); | ||
1354 | path->lowest_level = 0; | ||
1355 | BUG_ON(ret); | ||
1356 | |||
1357 | /* | ||
1358 | * swap blocks in fs tree and reloc tree. | ||
1359 | */ | ||
1360 | btrfs_set_node_blockptr(parent, slot, new_bytenr); | ||
1361 | btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); | ||
1362 | btrfs_mark_buffer_dirty(parent); | ||
1363 | |||
1364 | btrfs_set_node_blockptr(path->nodes[level], | ||
1365 | path->slots[level], old_bytenr); | ||
1366 | btrfs_set_node_ptr_generation(path->nodes[level], | ||
1367 | path->slots[level], old_ptr_gen); | ||
1368 | btrfs_mark_buffer_dirty(path->nodes[level]); | ||
1369 | |||
1370 | ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, | ||
1371 | path->nodes[level]->start, | ||
1372 | src->root_key.objectid, level - 1, 0); | ||
1373 | BUG_ON(ret); | ||
1374 | ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, | ||
1375 | 0, dest->root_key.objectid, level - 1, | ||
1376 | 0); | ||
1377 | BUG_ON(ret); | ||
1378 | |||
1379 | ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, | ||
1380 | path->nodes[level]->start, | ||
1381 | src->root_key.objectid, level - 1, 0); | ||
1382 | BUG_ON(ret); | ||
1383 | |||
1384 | ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, | ||
1385 | 0, dest->root_key.objectid, level - 1, | ||
1386 | 0); | ||
1387 | BUG_ON(ret); | ||
1388 | |||
1389 | btrfs_unlock_up_safe(path, 0); | ||
1390 | |||
1391 | ret = level; | ||
1392 | break; | ||
1393 | } | ||
1394 | btrfs_tree_unlock(parent); | ||
1395 | free_extent_buffer(parent); | ||
1396 | return ret; | ||
1397 | } | ||
1398 | |||
1399 | /* | ||
1400 | * helper to find next relocated block in reloc tree | ||
1401 | */ | ||
1402 | static noinline_for_stack | ||
1403 | int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, | ||
1404 | int *level) | ||
1405 | { | ||
1406 | struct extent_buffer *eb; | ||
1407 | int i; | ||
1408 | u64 last_snapshot; | ||
1409 | u32 nritems; | ||
1410 | |||
1411 | last_snapshot = btrfs_root_last_snapshot(&root->root_item); | ||
1412 | |||
1413 | for (i = 0; i < *level; i++) { | ||
1414 | free_extent_buffer(path->nodes[i]); | ||
1415 | path->nodes[i] = NULL; | ||
1416 | } | ||
1417 | |||
1418 | for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { | ||
1419 | eb = path->nodes[i]; | ||
1420 | nritems = btrfs_header_nritems(eb); | ||
1421 | while (path->slots[i] + 1 < nritems) { | ||
1422 | path->slots[i]++; | ||
1423 | if (btrfs_node_ptr_generation(eb, path->slots[i]) <= | ||
1424 | last_snapshot) | ||
1425 | continue; | ||
1426 | |||
1427 | *level = i; | ||
1428 | return 0; | ||
1429 | } | ||
1430 | free_extent_buffer(path->nodes[i]); | ||
1431 | path->nodes[i] = NULL; | ||
1432 | } | ||
1433 | return 1; | ||
1434 | } | ||
1435 | |||
1436 | /* | ||
1437 | * walk down reloc tree to find relocated block of lowest level | ||
1438 | */ | ||
1439 | static noinline_for_stack | ||
1440 | int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, | ||
1441 | int *level) | ||
1442 | { | ||
1443 | struct extent_buffer *eb = NULL; | ||
1444 | int i; | ||
1445 | u64 bytenr; | ||
1446 | u64 ptr_gen = 0; | ||
1447 | u64 last_snapshot; | ||
1448 | u32 blocksize; | ||
1449 | u32 nritems; | ||
1450 | |||
1451 | last_snapshot = btrfs_root_last_snapshot(&root->root_item); | ||
1452 | |||
1453 | for (i = *level; i > 0; i--) { | ||
1454 | eb = path->nodes[i]; | ||
1455 | nritems = btrfs_header_nritems(eb); | ||
1456 | while (path->slots[i] < nritems) { | ||
1457 | ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); | ||
1458 | if (ptr_gen > last_snapshot) | ||
1459 | break; | ||
1460 | path->slots[i]++; | ||
1461 | } | ||
1462 | if (path->slots[i] >= nritems) { | ||
1463 | if (i == *level) | ||
1464 | break; | ||
1465 | *level = i + 1; | ||
1466 | return 0; | ||
1467 | } | ||
1468 | if (i == 1) { | ||
1469 | *level = i; | ||
1470 | return 0; | ||
1471 | } | ||
1472 | |||
1473 | bytenr = btrfs_node_blockptr(eb, path->slots[i]); | ||
1474 | blocksize = btrfs_level_size(root, i - 1); | ||
1475 | eb = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
1476 | BUG_ON(btrfs_header_level(eb) != i - 1); | ||
1477 | path->nodes[i - 1] = eb; | ||
1478 | path->slots[i - 1] = 0; | ||
1479 | } | ||
1480 | return 1; | ||
1481 | } | ||
1482 | |||
1483 | /* | ||
1484 | * invalidate extent cache for file extents whose key in range of | ||
1485 | * [min_key, max_key) | ||
1486 | */ | ||
1487 | static int invalidate_extent_cache(struct btrfs_root *root, | ||
1488 | struct btrfs_key *min_key, | ||
1489 | struct btrfs_key *max_key) | ||
1490 | { | ||
1491 | struct inode *inode = NULL; | ||
1492 | u64 objectid; | ||
1493 | u64 start, end; | ||
1494 | |||
1495 | objectid = min_key->objectid; | ||
1496 | while (1) { | ||
1497 | cond_resched(); | ||
1498 | iput(inode); | ||
1499 | |||
1500 | if (objectid > max_key->objectid) | ||
1501 | break; | ||
1502 | |||
1503 | inode = find_next_inode(root, objectid); | ||
1504 | if (!inode) | ||
1505 | break; | ||
1506 | |||
1507 | if (inode->i_ino > max_key->objectid) { | ||
1508 | iput(inode); | ||
1509 | break; | ||
1510 | } | ||
1511 | |||
1512 | objectid = inode->i_ino + 1; | ||
1513 | if (!S_ISREG(inode->i_mode)) | ||
1514 | continue; | ||
1515 | |||
1516 | if (unlikely(min_key->objectid == inode->i_ino)) { | ||
1517 | if (min_key->type > BTRFS_EXTENT_DATA_KEY) | ||
1518 | continue; | ||
1519 | if (min_key->type < BTRFS_EXTENT_DATA_KEY) | ||
1520 | start = 0; | ||
1521 | else { | ||
1522 | start = min_key->offset; | ||
1523 | WARN_ON(!IS_ALIGNED(start, root->sectorsize)); | ||
1524 | } | ||
1525 | } else { | ||
1526 | start = 0; | ||
1527 | } | ||
1528 | |||
1529 | if (unlikely(max_key->objectid == inode->i_ino)) { | ||
1530 | if (max_key->type < BTRFS_EXTENT_DATA_KEY) | ||
1531 | continue; | ||
1532 | if (max_key->type > BTRFS_EXTENT_DATA_KEY) { | ||
1533 | end = (u64)-1; | ||
1534 | } else { | ||
1535 | if (max_key->offset == 0) | ||
1536 | continue; | ||
1537 | end = max_key->offset; | ||
1538 | WARN_ON(!IS_ALIGNED(end, root->sectorsize)); | ||
1539 | end--; | ||
1540 | } | ||
1541 | } else { | ||
1542 | end = (u64)-1; | ||
1543 | } | ||
1544 | |||
1545 | /* the lock_extent waits for readpage to complete */ | ||
1546 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
1547 | btrfs_drop_extent_cache(inode, start, end, 1); | ||
1548 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
1549 | } | ||
1550 | return 0; | ||
1551 | } | ||
1552 | |||
1553 | static int find_next_key(struct btrfs_path *path, int level, | ||
1554 | struct btrfs_key *key) | ||
1555 | |||
1556 | { | ||
1557 | while (level < BTRFS_MAX_LEVEL) { | ||
1558 | if (!path->nodes[level]) | ||
1559 | break; | ||
1560 | if (path->slots[level] + 1 < | ||
1561 | btrfs_header_nritems(path->nodes[level])) { | ||
1562 | btrfs_node_key_to_cpu(path->nodes[level], key, | ||
1563 | path->slots[level] + 1); | ||
1564 | return 0; | ||
1565 | } | ||
1566 | level++; | ||
1567 | } | ||
1568 | return 1; | ||
1569 | } | ||
1570 | |||
1571 | /* | ||
1572 | * merge the relocated tree blocks in reloc tree with corresponding | ||
1573 | * fs tree. | ||
1574 | */ | ||
1575 | static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | ||
1576 | struct btrfs_root *root) | ||
1577 | { | ||
1578 | LIST_HEAD(inode_list); | ||
1579 | struct btrfs_key key; | ||
1580 | struct btrfs_key next_key; | ||
1581 | struct btrfs_trans_handle *trans; | ||
1582 | struct btrfs_root *reloc_root; | ||
1583 | struct btrfs_root_item *root_item; | ||
1584 | struct btrfs_path *path; | ||
1585 | struct extent_buffer *leaf = NULL; | ||
1586 | unsigned long nr; | ||
1587 | int level; | ||
1588 | int max_level; | ||
1589 | int replaced = 0; | ||
1590 | int ret; | ||
1591 | int err = 0; | ||
1592 | |||
1593 | path = btrfs_alloc_path(); | ||
1594 | if (!path) | ||
1595 | return -ENOMEM; | ||
1596 | |||
1597 | reloc_root = root->reloc_root; | ||
1598 | root_item = &reloc_root->root_item; | ||
1599 | |||
1600 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | ||
1601 | level = btrfs_root_level(root_item); | ||
1602 | extent_buffer_get(reloc_root->node); | ||
1603 | path->nodes[level] = reloc_root->node; | ||
1604 | path->slots[level] = 0; | ||
1605 | } else { | ||
1606 | btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); | ||
1607 | |||
1608 | level = root_item->drop_level; | ||
1609 | BUG_ON(level == 0); | ||
1610 | path->lowest_level = level; | ||
1611 | ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); | ||
1612 | if (ret < 0) { | ||
1613 | btrfs_free_path(path); | ||
1614 | return ret; | ||
1615 | } | ||
1616 | |||
1617 | btrfs_node_key_to_cpu(path->nodes[level], &next_key, | ||
1618 | path->slots[level]); | ||
1619 | WARN_ON(memcmp(&key, &next_key, sizeof(key))); | ||
1620 | |||
1621 | btrfs_unlock_up_safe(path, 0); | ||
1622 | } | ||
1623 | |||
1624 | if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
1625 | trans = btrfs_start_transaction(root, 1); | ||
1626 | |||
1627 | leaf = path->nodes[0]; | ||
1628 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
1629 | btrfs_release_path(reloc_root, path); | ||
1630 | |||
1631 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
1632 | if (ret < 0) { | ||
1633 | err = ret; | ||
1634 | goto out; | ||
1635 | } | ||
1636 | |||
1637 | leaf = path->nodes[0]; | ||
1638 | btrfs_unlock_up_safe(path, 1); | ||
1639 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1640 | &inode_list); | ||
1641 | if (ret < 0) | ||
1642 | err = ret; | ||
1643 | goto out; | ||
1644 | } | ||
1645 | |||
1646 | memset(&next_key, 0, sizeof(next_key)); | ||
1647 | |||
1648 | while (1) { | ||
1649 | leaf = NULL; | ||
1650 | replaced = 0; | ||
1651 | trans = btrfs_start_transaction(root, 1); | ||
1652 | max_level = level; | ||
1653 | |||
1654 | ret = walk_down_reloc_tree(reloc_root, path, &level); | ||
1655 | if (ret < 0) { | ||
1656 | err = ret; | ||
1657 | goto out; | ||
1658 | } | ||
1659 | if (ret > 0) | ||
1660 | break; | ||
1661 | |||
1662 | if (!find_next_key(path, level, &key) && | ||
1663 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { | ||
1664 | ret = 0; | ||
1665 | } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { | ||
1666 | ret = replace_path(trans, root, reloc_root, | ||
1667 | path, &next_key, &leaf, | ||
1668 | level, max_level); | ||
1669 | } else { | ||
1670 | ret = replace_path(trans, root, reloc_root, | ||
1671 | path, &next_key, NULL, | ||
1672 | level, max_level); | ||
1673 | } | ||
1674 | if (ret < 0) { | ||
1675 | err = ret; | ||
1676 | goto out; | ||
1677 | } | ||
1678 | |||
1679 | if (ret > 0) { | ||
1680 | level = ret; | ||
1681 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
1682 | path->slots[level]); | ||
1683 | replaced = 1; | ||
1684 | } else if (leaf) { | ||
1685 | /* | ||
1686 | * no block got replaced, try replacing file extents | ||
1687 | */ | ||
1688 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
1689 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1690 | &inode_list); | ||
1691 | btrfs_tree_unlock(leaf); | ||
1692 | free_extent_buffer(leaf); | ||
1693 | BUG_ON(ret < 0); | ||
1694 | } | ||
1695 | |||
1696 | ret = walk_up_reloc_tree(reloc_root, path, &level); | ||
1697 | if (ret > 0) | ||
1698 | break; | ||
1699 | |||
1700 | BUG_ON(level == 0); | ||
1701 | /* | ||
1702 | * save the merging progress in the drop_progress. | ||
1703 | * this is OK since root refs == 1 in this case. | ||
1704 | */ | ||
1705 | btrfs_node_key(path->nodes[level], &root_item->drop_progress, | ||
1706 | path->slots[level]); | ||
1707 | root_item->drop_level = level; | ||
1708 | |||
1709 | nr = trans->blocks_used; | ||
1710 | btrfs_end_transaction(trans, root); | ||
1711 | |||
1712 | btrfs_btree_balance_dirty(root, nr); | ||
1713 | |||
1714 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | ||
1715 | invalidate_extent_cache(root, &key, &next_key); | ||
1716 | } | ||
1717 | |||
1718 | /* | ||
1719 | * handle the case only one block in the fs tree need to be | ||
1720 | * relocated and the block is tree root. | ||
1721 | */ | ||
1722 | leaf = btrfs_lock_root_node(root); | ||
1723 | ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); | ||
1724 | btrfs_tree_unlock(leaf); | ||
1725 | free_extent_buffer(leaf); | ||
1726 | if (ret < 0) | ||
1727 | err = ret; | ||
1728 | out: | ||
1729 | btrfs_free_path(path); | ||
1730 | |||
1731 | if (err == 0) { | ||
1732 | memset(&root_item->drop_progress, 0, | ||
1733 | sizeof(root_item->drop_progress)); | ||
1734 | root_item->drop_level = 0; | ||
1735 | btrfs_set_root_refs(root_item, 0); | ||
1736 | } | ||
1737 | |||
1738 | nr = trans->blocks_used; | ||
1739 | btrfs_end_transaction(trans, root); | ||
1740 | |||
1741 | btrfs_btree_balance_dirty(root, nr); | ||
1742 | |||
1743 | /* | ||
1744 | * put inodes while we aren't holding the tree locks | ||
1745 | */ | ||
1746 | while (!list_empty(&inode_list)) { | ||
1747 | struct inodevec *ivec; | ||
1748 | ivec = list_entry(inode_list.next, struct inodevec, list); | ||
1749 | list_del(&ivec->list); | ||
1750 | while (ivec->nr > 0) { | ||
1751 | ivec->nr--; | ||
1752 | iput(ivec->inode[ivec->nr]); | ||
1753 | } | ||
1754 | kfree(ivec); | ||
1755 | } | ||
1756 | |||
1757 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | ||
1758 | invalidate_extent_cache(root, &key, &next_key); | ||
1759 | |||
1760 | return err; | ||
1761 | } | ||
1762 | |||
1763 | /* | ||
1764 | * callback for the work threads. | ||
1765 | * this function merges reloc tree with corresponding fs tree, | ||
1766 | * and then drops the reloc tree. | ||
1767 | */ | ||
1768 | static void merge_func(struct btrfs_work *work) | ||
1769 | { | ||
1770 | struct btrfs_trans_handle *trans; | ||
1771 | struct btrfs_root *root; | ||
1772 | struct btrfs_root *reloc_root; | ||
1773 | struct async_merge *async; | ||
1774 | |||
1775 | async = container_of(work, struct async_merge, work); | ||
1776 | reloc_root = async->root; | ||
1777 | |||
1778 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
1779 | root = read_fs_root(reloc_root->fs_info, | ||
1780 | reloc_root->root_key.offset); | ||
1781 | BUG_ON(IS_ERR(root)); | ||
1782 | BUG_ON(root->reloc_root != reloc_root); | ||
1783 | |||
1784 | merge_reloc_root(async->rc, root); | ||
1785 | |||
1786 | trans = btrfs_start_transaction(root, 1); | ||
1787 | btrfs_update_reloc_root(trans, root); | ||
1788 | btrfs_end_transaction(trans, root); | ||
1789 | } | ||
1790 | |||
1791 | btrfs_drop_dead_root(reloc_root); | ||
1792 | |||
1793 | if (atomic_dec_and_test(async->num_pending)) | ||
1794 | complete(async->done); | ||
1795 | |||
1796 | kfree(async); | ||
1797 | } | ||
1798 | |||
1799 | static int merge_reloc_roots(struct reloc_control *rc) | ||
1800 | { | ||
1801 | struct async_merge *async; | ||
1802 | struct btrfs_root *root; | ||
1803 | struct completion done; | ||
1804 | atomic_t num_pending; | ||
1805 | |||
1806 | init_completion(&done); | ||
1807 | atomic_set(&num_pending, 1); | ||
1808 | |||
1809 | while (!list_empty(&rc->reloc_roots)) { | ||
1810 | root = list_entry(rc->reloc_roots.next, | ||
1811 | struct btrfs_root, root_list); | ||
1812 | list_del_init(&root->root_list); | ||
1813 | |||
1814 | async = kmalloc(sizeof(*async), GFP_NOFS); | ||
1815 | BUG_ON(!async); | ||
1816 | async->work.func = merge_func; | ||
1817 | async->work.flags = 0; | ||
1818 | async->rc = rc; | ||
1819 | async->root = root; | ||
1820 | async->done = &done; | ||
1821 | async->num_pending = &num_pending; | ||
1822 | atomic_inc(&num_pending); | ||
1823 | btrfs_queue_worker(&rc->workers, &async->work); | ||
1824 | } | ||
1825 | |||
1826 | if (!atomic_dec_and_test(&num_pending)) | ||
1827 | wait_for_completion(&done); | ||
1828 | |||
1829 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | ||
1830 | return 0; | ||
1831 | } | ||
1832 | |||
1833 | static void free_block_list(struct rb_root *blocks) | ||
1834 | { | ||
1835 | struct tree_block *block; | ||
1836 | struct rb_node *rb_node; | ||
1837 | while ((rb_node = rb_first(blocks))) { | ||
1838 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
1839 | rb_erase(rb_node, blocks); | ||
1840 | kfree(block); | ||
1841 | } | ||
1842 | } | ||
1843 | |||
1844 | static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, | ||
1845 | struct btrfs_root *reloc_root) | ||
1846 | { | ||
1847 | struct btrfs_root *root; | ||
1848 | |||
1849 | if (reloc_root->last_trans == trans->transid) | ||
1850 | return 0; | ||
1851 | |||
1852 | root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); | ||
1853 | BUG_ON(IS_ERR(root)); | ||
1854 | BUG_ON(root->reloc_root != reloc_root); | ||
1855 | |||
1856 | return btrfs_record_root_in_trans(trans, root); | ||
1857 | } | ||
1858 | |||
1859 | /* | ||
1860 | * select one tree from trees that references the block. | ||
1861 | * for blocks in refernce counted trees, we preper reloc tree. | ||
1862 | * if no reloc tree found and reloc_only is true, NULL is returned. | ||
1863 | */ | ||
1864 | static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, | ||
1865 | struct backref_node *node, | ||
1866 | struct backref_edge *edges[], | ||
1867 | int *nr, int reloc_only) | ||
1868 | { | ||
1869 | struct backref_node *next; | ||
1870 | struct btrfs_root *root; | ||
1871 | int index; | ||
1872 | int loop = 0; | ||
1873 | again: | ||
1874 | index = 0; | ||
1875 | next = node; | ||
1876 | while (1) { | ||
1877 | cond_resched(); | ||
1878 | next = walk_up_backref(next, edges, &index); | ||
1879 | root = next->root; | ||
1880 | if (!root) { | ||
1881 | BUG_ON(!node->old_root); | ||
1882 | goto skip; | ||
1883 | } | ||
1884 | |||
1885 | /* no other choice for non-refernce counted tree */ | ||
1886 | if (!root->ref_cows) { | ||
1887 | BUG_ON(reloc_only); | ||
1888 | break; | ||
1889 | } | ||
1890 | |||
1891 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
1892 | record_reloc_root_in_trans(trans, root); | ||
1893 | break; | ||
1894 | } | ||
1895 | |||
1896 | if (loop) { | ||
1897 | btrfs_record_root_in_trans(trans, root); | ||
1898 | break; | ||
1899 | } | ||
1900 | |||
1901 | if (reloc_only || next != node) { | ||
1902 | if (!root->reloc_root) | ||
1903 | btrfs_record_root_in_trans(trans, root); | ||
1904 | root = root->reloc_root; | ||
1905 | /* | ||
1906 | * if the reloc tree was created in current | ||
1907 | * transation, there is no node in backref tree | ||
1908 | * corresponds to the root of the reloc tree. | ||
1909 | */ | ||
1910 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
1911 | trans->transid - 1) | ||
1912 | break; | ||
1913 | } | ||
1914 | skip: | ||
1915 | root = NULL; | ||
1916 | next = walk_down_backref(edges, &index); | ||
1917 | if (!next || next->level <= node->level) | ||
1918 | break; | ||
1919 | } | ||
1920 | |||
1921 | if (!root && !loop && !reloc_only) { | ||
1922 | loop = 1; | ||
1923 | goto again; | ||
1924 | } | ||
1925 | |||
1926 | if (root) | ||
1927 | *nr = index; | ||
1928 | else | ||
1929 | *nr = 0; | ||
1930 | |||
1931 | return root; | ||
1932 | } | ||
1933 | |||
1934 | static noinline_for_stack | ||
1935 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | ||
1936 | struct backref_node *node) | ||
1937 | { | ||
1938 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
1939 | int nr; | ||
1940 | return __select_one_root(trans, node, edges, &nr, 0); | ||
1941 | } | ||
1942 | |||
1943 | static noinline_for_stack | ||
1944 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | ||
1945 | struct backref_node *node, | ||
1946 | struct backref_edge *edges[], int *nr) | ||
1947 | { | ||
1948 | return __select_one_root(trans, node, edges, nr, 1); | ||
1949 | } | ||
1950 | |||
1951 | static void grab_path_buffers(struct btrfs_path *path, | ||
1952 | struct backref_node *node, | ||
1953 | struct backref_edge *edges[], int nr) | ||
1954 | { | ||
1955 | int i = 0; | ||
1956 | while (1) { | ||
1957 | drop_node_buffer(node); | ||
1958 | node->eb = path->nodes[node->level]; | ||
1959 | BUG_ON(!node->eb); | ||
1960 | if (path->locks[node->level]) | ||
1961 | node->locked = 1; | ||
1962 | path->nodes[node->level] = NULL; | ||
1963 | path->locks[node->level] = 0; | ||
1964 | |||
1965 | if (i >= nr) | ||
1966 | break; | ||
1967 | |||
1968 | edges[i]->blockptr = node->eb->start; | ||
1969 | node = edges[i]->node[UPPER]; | ||
1970 | i++; | ||
1971 | } | ||
1972 | } | ||
1973 | |||
1974 | /* | ||
1975 | * relocate a block tree, and then update pointers in upper level | ||
1976 | * blocks that reference the block to point to the new location. | ||
1977 | * | ||
1978 | * if called by link_to_upper, the block has already been relocated. | ||
1979 | * in that case this function just updates pointers. | ||
1980 | */ | ||
1981 | static int do_relocation(struct btrfs_trans_handle *trans, | ||
1982 | struct backref_node *node, | ||
1983 | struct btrfs_key *key, | ||
1984 | struct btrfs_path *path, int lowest) | ||
1985 | { | ||
1986 | struct backref_node *upper; | ||
1987 | struct backref_edge *edge; | ||
1988 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
1989 | struct btrfs_root *root; | ||
1990 | struct extent_buffer *eb; | ||
1991 | u32 blocksize; | ||
1992 | u64 bytenr; | ||
1993 | u64 generation; | ||
1994 | int nr; | ||
1995 | int slot; | ||
1996 | int ret; | ||
1997 | int err = 0; | ||
1998 | |||
1999 | BUG_ON(lowest && node->eb); | ||
2000 | |||
2001 | path->lowest_level = node->level + 1; | ||
2002 | list_for_each_entry(edge, &node->upper, list[LOWER]) { | ||
2003 | cond_resched(); | ||
2004 | if (node->eb && node->eb->start == edge->blockptr) | ||
2005 | continue; | ||
2006 | |||
2007 | upper = edge->node[UPPER]; | ||
2008 | root = select_reloc_root(trans, upper, edges, &nr); | ||
2009 | if (!root) | ||
2010 | continue; | ||
2011 | |||
2012 | if (upper->eb && !upper->locked) | ||
2013 | drop_node_buffer(upper); | ||
2014 | |||
2015 | if (!upper->eb) { | ||
2016 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2017 | if (ret < 0) { | ||
2018 | err = ret; | ||
2019 | break; | ||
2020 | } | ||
2021 | BUG_ON(ret > 0); | ||
2022 | |||
2023 | slot = path->slots[upper->level]; | ||
2024 | |||
2025 | btrfs_unlock_up_safe(path, upper->level + 1); | ||
2026 | grab_path_buffers(path, upper, edges, nr); | ||
2027 | |||
2028 | btrfs_release_path(NULL, path); | ||
2029 | } else { | ||
2030 | ret = btrfs_bin_search(upper->eb, key, upper->level, | ||
2031 | &slot); | ||
2032 | BUG_ON(ret); | ||
2033 | } | ||
2034 | |||
2035 | bytenr = btrfs_node_blockptr(upper->eb, slot); | ||
2036 | if (!lowest) { | ||
2037 | if (node->eb->start == bytenr) { | ||
2038 | btrfs_tree_unlock(upper->eb); | ||
2039 | upper->locked = 0; | ||
2040 | continue; | ||
2041 | } | ||
2042 | } else { | ||
2043 | BUG_ON(node->bytenr != bytenr); | ||
2044 | } | ||
2045 | |||
2046 | blocksize = btrfs_level_size(root, node->level); | ||
2047 | generation = btrfs_node_ptr_generation(upper->eb, slot); | ||
2048 | eb = read_tree_block(root, bytenr, blocksize, generation); | ||
2049 | btrfs_tree_lock(eb); | ||
2050 | btrfs_set_lock_blocking(eb); | ||
2051 | |||
2052 | if (!node->eb) { | ||
2053 | ret = btrfs_cow_block(trans, root, eb, upper->eb, | ||
2054 | slot, &eb); | ||
2055 | if (ret < 0) { | ||
2056 | err = ret; | ||
2057 | break; | ||
2058 | } | ||
2059 | btrfs_set_lock_blocking(eb); | ||
2060 | node->eb = eb; | ||
2061 | node->locked = 1; | ||
2062 | } else { | ||
2063 | btrfs_set_node_blockptr(upper->eb, slot, | ||
2064 | node->eb->start); | ||
2065 | btrfs_set_node_ptr_generation(upper->eb, slot, | ||
2066 | trans->transid); | ||
2067 | btrfs_mark_buffer_dirty(upper->eb); | ||
2068 | |||
2069 | ret = btrfs_inc_extent_ref(trans, root, | ||
2070 | node->eb->start, blocksize, | ||
2071 | upper->eb->start, | ||
2072 | btrfs_header_owner(upper->eb), | ||
2073 | node->level, 0); | ||
2074 | BUG_ON(ret); | ||
2075 | |||
2076 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | ||
2077 | BUG_ON(ret); | ||
2078 | |||
2079 | btrfs_tree_unlock(eb); | ||
2080 | free_extent_buffer(eb); | ||
2081 | } | ||
2082 | if (!lowest) { | ||
2083 | btrfs_tree_unlock(upper->eb); | ||
2084 | upper->locked = 0; | ||
2085 | } | ||
2086 | } | ||
2087 | path->lowest_level = 0; | ||
2088 | return err; | ||
2089 | } | ||
2090 | |||
2091 | static int link_to_upper(struct btrfs_trans_handle *trans, | ||
2092 | struct backref_node *node, | ||
2093 | struct btrfs_path *path) | ||
2094 | { | ||
2095 | struct btrfs_key key; | ||
2096 | if (!node->eb || list_empty(&node->upper)) | ||
2097 | return 0; | ||
2098 | |||
2099 | btrfs_node_key_to_cpu(node->eb, &key, 0); | ||
2100 | return do_relocation(trans, node, &key, path, 0); | ||
2101 | } | ||
2102 | |||
2103 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, | ||
2104 | struct backref_cache *cache, | ||
2105 | struct btrfs_path *path) | ||
2106 | { | ||
2107 | struct backref_node *node; | ||
2108 | int level; | ||
2109 | int ret; | ||
2110 | int err = 0; | ||
2111 | |||
2112 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
2113 | while (!list_empty(&cache->pending[level])) { | ||
2114 | node = list_entry(cache->pending[level].next, | ||
2115 | struct backref_node, lower); | ||
2116 | BUG_ON(node->level != level); | ||
2117 | |||
2118 | ret = link_to_upper(trans, node, path); | ||
2119 | if (ret < 0) | ||
2120 | err = ret; | ||
2121 | /* | ||
2122 | * this remove the node from the pending list and | ||
2123 | * may add some other nodes to the level + 1 | ||
2124 | * pending list | ||
2125 | */ | ||
2126 | remove_backref_node(cache, node); | ||
2127 | } | ||
2128 | } | ||
2129 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
2130 | return err; | ||
2131 | } | ||
2132 | |||
2133 | static void mark_block_processed(struct reloc_control *rc, | ||
2134 | struct backref_node *node) | ||
2135 | { | ||
2136 | u32 blocksize; | ||
2137 | if (node->level == 0 || | ||
2138 | in_block_group(node->bytenr, rc->block_group)) { | ||
2139 | blocksize = btrfs_level_size(rc->extent_root, node->level); | ||
2140 | set_extent_bits(&rc->processed_blocks, node->bytenr, | ||
2141 | node->bytenr + blocksize - 1, EXTENT_DIRTY, | ||
2142 | GFP_NOFS); | ||
2143 | } | ||
2144 | node->processed = 1; | ||
2145 | } | ||
2146 | |||
2147 | /* | ||
2148 | * mark a block and all blocks directly/indirectly reference the block | ||
2149 | * as processed. | ||
2150 | */ | ||
2151 | static void update_processed_blocks(struct reloc_control *rc, | ||
2152 | struct backref_node *node) | ||
2153 | { | ||
2154 | struct backref_node *next = node; | ||
2155 | struct backref_edge *edge; | ||
2156 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
2157 | int index = 0; | ||
2158 | |||
2159 | while (next) { | ||
2160 | cond_resched(); | ||
2161 | while (1) { | ||
2162 | if (next->processed) | ||
2163 | break; | ||
2164 | |||
2165 | mark_block_processed(rc, next); | ||
2166 | |||
2167 | if (list_empty(&next->upper)) | ||
2168 | break; | ||
2169 | |||
2170 | edge = list_entry(next->upper.next, | ||
2171 | struct backref_edge, list[LOWER]); | ||
2172 | edges[index++] = edge; | ||
2173 | next = edge->node[UPPER]; | ||
2174 | } | ||
2175 | next = walk_down_backref(edges, &index); | ||
2176 | } | ||
2177 | } | ||
2178 | |||
2179 | static int tree_block_processed(u64 bytenr, u32 blocksize, | ||
2180 | struct reloc_control *rc) | ||
2181 | { | ||
2182 | if (test_range_bit(&rc->processed_blocks, bytenr, | ||
2183 | bytenr + blocksize - 1, EXTENT_DIRTY, 1)) | ||
2184 | return 1; | ||
2185 | return 0; | ||
2186 | } | ||
2187 | |||
2188 | /* | ||
2189 | * check if there are any file extent pointers in the leaf point to | ||
2190 | * data require processing | ||
2191 | */ | ||
2192 | static int check_file_extents(struct reloc_control *rc, | ||
2193 | u64 bytenr, u32 blocksize, u64 ptr_gen) | ||
2194 | { | ||
2195 | struct btrfs_key found_key; | ||
2196 | struct btrfs_file_extent_item *fi; | ||
2197 | struct extent_buffer *leaf; | ||
2198 | u32 nritems; | ||
2199 | int i; | ||
2200 | int ret = 0; | ||
2201 | |||
2202 | leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); | ||
2203 | |||
2204 | nritems = btrfs_header_nritems(leaf); | ||
2205 | for (i = 0; i < nritems; i++) { | ||
2206 | cond_resched(); | ||
2207 | btrfs_item_key_to_cpu(leaf, &found_key, i); | ||
2208 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
2209 | continue; | ||
2210 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
2211 | if (btrfs_file_extent_type(leaf, fi) == | ||
2212 | BTRFS_FILE_EXTENT_INLINE) | ||
2213 | continue; | ||
2214 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
2215 | if (bytenr == 0) | ||
2216 | continue; | ||
2217 | if (in_block_group(bytenr, rc->block_group)) { | ||
2218 | ret = 1; | ||
2219 | break; | ||
2220 | } | ||
2221 | } | ||
2222 | free_extent_buffer(leaf); | ||
2223 | return ret; | ||
2224 | } | ||
2225 | |||
2226 | /* | ||
2227 | * scan child blocks of a given block to find blocks require processing | ||
2228 | */ | ||
2229 | static int add_child_blocks(struct btrfs_trans_handle *trans, | ||
2230 | struct reloc_control *rc, | ||
2231 | struct backref_node *node, | ||
2232 | struct rb_root *blocks) | ||
2233 | { | ||
2234 | struct tree_block *block; | ||
2235 | struct rb_node *rb_node; | ||
2236 | u64 bytenr; | ||
2237 | u64 ptr_gen; | ||
2238 | u32 blocksize; | ||
2239 | u32 nritems; | ||
2240 | int i; | ||
2241 | int err = 0; | ||
2242 | |||
2243 | nritems = btrfs_header_nritems(node->eb); | ||
2244 | blocksize = btrfs_level_size(rc->extent_root, node->level - 1); | ||
2245 | for (i = 0; i < nritems; i++) { | ||
2246 | cond_resched(); | ||
2247 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2248 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2249 | if (ptr_gen == trans->transid) | ||
2250 | continue; | ||
2251 | if (!in_block_group(bytenr, rc->block_group) && | ||
2252 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2253 | continue; | ||
2254 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2255 | continue; | ||
2256 | |||
2257 | readahead_tree_block(rc->extent_root, | ||
2258 | bytenr, blocksize, ptr_gen); | ||
2259 | } | ||
2260 | |||
2261 | for (i = 0; i < nritems; i++) { | ||
2262 | cond_resched(); | ||
2263 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2264 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2265 | if (ptr_gen == trans->transid) | ||
2266 | continue; | ||
2267 | if (!in_block_group(bytenr, rc->block_group) && | ||
2268 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2269 | continue; | ||
2270 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2271 | continue; | ||
2272 | if (!in_block_group(bytenr, rc->block_group) && | ||
2273 | !check_file_extents(rc, bytenr, blocksize, ptr_gen)) | ||
2274 | continue; | ||
2275 | |||
2276 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
2277 | if (!block) { | ||
2278 | err = -ENOMEM; | ||
2279 | break; | ||
2280 | } | ||
2281 | block->bytenr = bytenr; | ||
2282 | btrfs_node_key_to_cpu(node->eb, &block->key, i); | ||
2283 | block->level = node->level - 1; | ||
2284 | block->key_ready = 1; | ||
2285 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
2286 | BUG_ON(rb_node); | ||
2287 | } | ||
2288 | if (err) | ||
2289 | free_block_list(blocks); | ||
2290 | return err; | ||
2291 | } | ||
2292 | |||
2293 | /* | ||
2294 | * find adjacent blocks require processing | ||
2295 | */ | ||
2296 | static noinline_for_stack | ||
2297 | int add_adjacent_blocks(struct btrfs_trans_handle *trans, | ||
2298 | struct reloc_control *rc, | ||
2299 | struct backref_cache *cache, | ||
2300 | struct rb_root *blocks, int level, | ||
2301 | struct backref_node **upper) | ||
2302 | { | ||
2303 | struct backref_node *node; | ||
2304 | int ret = 0; | ||
2305 | |||
2306 | WARN_ON(!list_empty(&cache->pending[level])); | ||
2307 | |||
2308 | if (list_empty(&cache->pending[level + 1])) | ||
2309 | return 1; | ||
2310 | |||
2311 | node = list_entry(cache->pending[level + 1].next, | ||
2312 | struct backref_node, lower); | ||
2313 | if (node->eb) | ||
2314 | ret = add_child_blocks(trans, rc, node, blocks); | ||
2315 | |||
2316 | *upper = node; | ||
2317 | return ret; | ||
2318 | } | ||
2319 | |||
2320 | static int get_tree_block_key(struct reloc_control *rc, | ||
2321 | struct tree_block *block) | ||
2322 | { | ||
2323 | struct extent_buffer *eb; | ||
2324 | |||
2325 | BUG_ON(block->key_ready); | ||
2326 | eb = read_tree_block(rc->extent_root, block->bytenr, | ||
2327 | block->key.objectid, block->key.offset); | ||
2328 | WARN_ON(btrfs_header_level(eb) != block->level); | ||
2329 | if (block->level == 0) | ||
2330 | btrfs_item_key_to_cpu(eb, &block->key, 0); | ||
2331 | else | ||
2332 | btrfs_node_key_to_cpu(eb, &block->key, 0); | ||
2333 | free_extent_buffer(eb); | ||
2334 | block->key_ready = 1; | ||
2335 | return 0; | ||
2336 | } | ||
2337 | |||
2338 | static int reada_tree_block(struct reloc_control *rc, | ||
2339 | struct tree_block *block) | ||
2340 | { | ||
2341 | BUG_ON(block->key_ready); | ||
2342 | readahead_tree_block(rc->extent_root, block->bytenr, | ||
2343 | block->key.objectid, block->key.offset); | ||
2344 | return 0; | ||
2345 | } | ||
2346 | |||
2347 | /* | ||
2348 | * helper function to relocate a tree block | ||
2349 | */ | ||
2350 | static int relocate_tree_block(struct btrfs_trans_handle *trans, | ||
2351 | struct reloc_control *rc, | ||
2352 | struct backref_node *node, | ||
2353 | struct btrfs_key *key, | ||
2354 | struct btrfs_path *path) | ||
2355 | { | ||
2356 | struct btrfs_root *root; | ||
2357 | int ret; | ||
2358 | |||
2359 | root = select_one_root(trans, node); | ||
2360 | if (unlikely(!root)) { | ||
2361 | rc->found_old_snapshot = 1; | ||
2362 | update_processed_blocks(rc, node); | ||
2363 | return 0; | ||
2364 | } | ||
2365 | |||
2366 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
2367 | ret = do_relocation(trans, node, key, path, 1); | ||
2368 | if (ret < 0) | ||
2369 | goto out; | ||
2370 | if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
2371 | ret = replace_file_extents(trans, rc, root, | ||
2372 | node->eb, NULL); | ||
2373 | if (ret < 0) | ||
2374 | goto out; | ||
2375 | } | ||
2376 | drop_node_buffer(node); | ||
2377 | } else if (!root->ref_cows) { | ||
2378 | path->lowest_level = node->level; | ||
2379 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2380 | btrfs_release_path(root, path); | ||
2381 | if (ret < 0) | ||
2382 | goto out; | ||
2383 | } else if (root != node->root) { | ||
2384 | WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); | ||
2385 | } | ||
2386 | |||
2387 | update_processed_blocks(rc, node); | ||
2388 | ret = 0; | ||
2389 | out: | ||
2390 | drop_node_buffer(node); | ||
2391 | return ret; | ||
2392 | } | ||
2393 | |||
2394 | /* | ||
2395 | * relocate a list of blocks | ||
2396 | */ | ||
2397 | static noinline_for_stack | ||
2398 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, | ||
2399 | struct reloc_control *rc, struct rb_root *blocks) | ||
2400 | { | ||
2401 | struct backref_cache *cache; | ||
2402 | struct backref_node *node; | ||
2403 | struct btrfs_path *path; | ||
2404 | struct tree_block *block; | ||
2405 | struct rb_node *rb_node; | ||
2406 | int level = -1; | ||
2407 | int ret; | ||
2408 | int err = 0; | ||
2409 | |||
2410 | path = btrfs_alloc_path(); | ||
2411 | if (!path) | ||
2412 | return -ENOMEM; | ||
2413 | |||
2414 | cache = kmalloc(sizeof(*cache), GFP_NOFS); | ||
2415 | if (!cache) { | ||
2416 | btrfs_free_path(path); | ||
2417 | return -ENOMEM; | ||
2418 | } | ||
2419 | |||
2420 | backref_cache_init(cache); | ||
2421 | |||
2422 | rb_node = rb_first(blocks); | ||
2423 | while (rb_node) { | ||
2424 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2425 | if (level == -1) | ||
2426 | level = block->level; | ||
2427 | else | ||
2428 | BUG_ON(level != block->level); | ||
2429 | if (!block->key_ready) | ||
2430 | reada_tree_block(rc, block); | ||
2431 | rb_node = rb_next(rb_node); | ||
2432 | } | ||
2433 | |||
2434 | rb_node = rb_first(blocks); | ||
2435 | while (rb_node) { | ||
2436 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2437 | if (!block->key_ready) | ||
2438 | get_tree_block_key(rc, block); | ||
2439 | rb_node = rb_next(rb_node); | ||
2440 | } | ||
2441 | |||
2442 | rb_node = rb_first(blocks); | ||
2443 | while (rb_node) { | ||
2444 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2445 | |||
2446 | node = build_backref_tree(rc, cache, &block->key, | ||
2447 | block->level, block->bytenr); | ||
2448 | if (IS_ERR(node)) { | ||
2449 | err = PTR_ERR(node); | ||
2450 | goto out; | ||
2451 | } | ||
2452 | |||
2453 | ret = relocate_tree_block(trans, rc, node, &block->key, | ||
2454 | path); | ||
2455 | if (ret < 0) { | ||
2456 | err = ret; | ||
2457 | goto out; | ||
2458 | } | ||
2459 | remove_backref_node(cache, node); | ||
2460 | rb_node = rb_next(rb_node); | ||
2461 | } | ||
2462 | |||
2463 | if (level > 0) | ||
2464 | goto out; | ||
2465 | |||
2466 | free_block_list(blocks); | ||
2467 | |||
2468 | /* | ||
2469 | * now backrefs of some upper level tree blocks have been cached, | ||
2470 | * try relocating blocks referenced by these upper level blocks. | ||
2471 | */ | ||
2472 | while (1) { | ||
2473 | struct backref_node *upper = NULL; | ||
2474 | if (trans->transaction->in_commit || | ||
2475 | trans->transaction->delayed_refs.flushing) | ||
2476 | break; | ||
2477 | |||
2478 | ret = add_adjacent_blocks(trans, rc, cache, blocks, level, | ||
2479 | &upper); | ||
2480 | if (ret < 0) | ||
2481 | err = ret; | ||
2482 | if (ret != 0) | ||
2483 | break; | ||
2484 | |||
2485 | rb_node = rb_first(blocks); | ||
2486 | while (rb_node) { | ||
2487 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2488 | if (trans->transaction->in_commit || | ||
2489 | trans->transaction->delayed_refs.flushing) | ||
2490 | goto out; | ||
2491 | BUG_ON(!block->key_ready); | ||
2492 | node = build_backref_tree(rc, cache, &block->key, | ||
2493 | level, block->bytenr); | ||
2494 | if (IS_ERR(node)) { | ||
2495 | err = PTR_ERR(node); | ||
2496 | goto out; | ||
2497 | } | ||
2498 | |||
2499 | ret = relocate_tree_block(trans, rc, node, | ||
2500 | &block->key, path); | ||
2501 | if (ret < 0) { | ||
2502 | err = ret; | ||
2503 | goto out; | ||
2504 | } | ||
2505 | remove_backref_node(cache, node); | ||
2506 | rb_node = rb_next(rb_node); | ||
2507 | } | ||
2508 | free_block_list(blocks); | ||
2509 | |||
2510 | if (upper) { | ||
2511 | ret = link_to_upper(trans, upper, path); | ||
2512 | if (ret < 0) { | ||
2513 | err = ret; | ||
2514 | break; | ||
2515 | } | ||
2516 | remove_backref_node(cache, upper); | ||
2517 | } | ||
2518 | } | ||
2519 | out: | ||
2520 | free_block_list(blocks); | ||
2521 | |||
2522 | ret = finish_pending_nodes(trans, cache, path); | ||
2523 | if (ret < 0) | ||
2524 | err = ret; | ||
2525 | |||
2526 | kfree(cache); | ||
2527 | btrfs_free_path(path); | ||
2528 | return err; | ||
2529 | } | ||
2530 | |||
2531 | static noinline_for_stack | ||
2532 | int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | ||
2533 | { | ||
2534 | u64 page_start; | ||
2535 | u64 page_end; | ||
2536 | unsigned long i; | ||
2537 | unsigned long first_index; | ||
2538 | unsigned long last_index; | ||
2539 | unsigned int total_read = 0; | ||
2540 | unsigned int total_dirty = 0; | ||
2541 | struct page *page; | ||
2542 | struct file_ra_state *ra; | ||
2543 | struct btrfs_ordered_extent *ordered; | ||
2544 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
2545 | int ret = 0; | ||
2546 | |||
2547 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
2548 | if (!ra) | ||
2549 | return -ENOMEM; | ||
2550 | |||
2551 | mutex_lock(&inode->i_mutex); | ||
2552 | first_index = start >> PAGE_CACHE_SHIFT; | ||
2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
2554 | |||
2555 | /* make sure the dirty trick played by the caller work */ | ||
2556 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
2557 | first_index, last_index); | ||
2558 | if (ret) | ||
2559 | goto out_unlock; | ||
2560 | |||
2561 | file_ra_state_init(ra, inode->i_mapping); | ||
2562 | |||
2563 | for (i = first_index ; i <= last_index; i++) { | ||
2564 | if (total_read % ra->ra_pages == 0) { | ||
2565 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | ||
2566 | min(last_index, ra->ra_pages + i - 1)); | ||
2567 | } | ||
2568 | total_read++; | ||
2569 | again: | ||
2570 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
2571 | BUG_ON(1); | ||
2572 | page = grab_cache_page(inode->i_mapping, i); | ||
2573 | if (!page) { | ||
2574 | ret = -ENOMEM; | ||
2575 | goto out_unlock; | ||
2576 | } | ||
2577 | if (!PageUptodate(page)) { | ||
2578 | btrfs_readpage(NULL, page); | ||
2579 | lock_page(page); | ||
2580 | if (!PageUptodate(page)) { | ||
2581 | unlock_page(page); | ||
2582 | page_cache_release(page); | ||
2583 | ret = -EIO; | ||
2584 | goto out_unlock; | ||
2585 | } | ||
2586 | } | ||
2587 | wait_on_page_writeback(page); | ||
2588 | |||
2589 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
2590 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
2591 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
2592 | |||
2593 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
2594 | if (ordered) { | ||
2595 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
2596 | unlock_page(page); | ||
2597 | page_cache_release(page); | ||
2598 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2599 | btrfs_put_ordered_extent(ordered); | ||
2600 | goto again; | ||
2601 | } | ||
2602 | set_page_extent_mapped(page); | ||
2603 | |||
2604 | if (i == first_index) | ||
2605 | set_extent_bits(io_tree, page_start, page_end, | ||
2606 | EXTENT_BOUNDARY, GFP_NOFS); | ||
2607 | btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
2608 | |||
2609 | set_page_dirty(page); | ||
2610 | total_dirty++; | ||
2611 | |||
2612 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
2613 | unlock_page(page); | ||
2614 | page_cache_release(page); | ||
2615 | } | ||
2616 | out_unlock: | ||
2617 | mutex_unlock(&inode->i_mutex); | ||
2618 | kfree(ra); | ||
2619 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
2620 | return ret; | ||
2621 | } | ||
2622 | |||
2623 | static noinline_for_stack | ||
2624 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | ||
2625 | { | ||
2626 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2627 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
2628 | struct extent_map *em; | ||
2629 | u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; | ||
2630 | u64 end = start + extent_key->offset - 1; | ||
2631 | |||
2632 | em = alloc_extent_map(GFP_NOFS); | ||
2633 | em->start = start; | ||
2634 | em->len = extent_key->offset; | ||
2635 | em->block_len = extent_key->offset; | ||
2636 | em->block_start = extent_key->objectid; | ||
2637 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
2638 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
2639 | |||
2640 | /* setup extent map to cheat btrfs_readpage */ | ||
2641 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2642 | while (1) { | ||
2643 | int ret; | ||
2644 | spin_lock(&em_tree->lock); | ||
2645 | ret = add_extent_mapping(em_tree, em); | ||
2646 | spin_unlock(&em_tree->lock); | ||
2647 | if (ret != -EEXIST) { | ||
2648 | free_extent_map(em); | ||
2649 | break; | ||
2650 | } | ||
2651 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
2652 | } | ||
2653 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2654 | |||
2655 | return relocate_inode_pages(inode, start, extent_key->offset); | ||
2656 | } | ||
2657 | |||
2658 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
2659 | static int get_ref_objectid_v0(struct reloc_control *rc, | ||
2660 | struct btrfs_path *path, | ||
2661 | struct btrfs_key *extent_key, | ||
2662 | u64 *ref_objectid, int *path_change) | ||
2663 | { | ||
2664 | struct btrfs_key key; | ||
2665 | struct extent_buffer *leaf; | ||
2666 | struct btrfs_extent_ref_v0 *ref0; | ||
2667 | int ret; | ||
2668 | int slot; | ||
2669 | |||
2670 | leaf = path->nodes[0]; | ||
2671 | slot = path->slots[0]; | ||
2672 | while (1) { | ||
2673 | if (slot >= btrfs_header_nritems(leaf)) { | ||
2674 | ret = btrfs_next_leaf(rc->extent_root, path); | ||
2675 | if (ret < 0) | ||
2676 | return ret; | ||
2677 | BUG_ON(ret > 0); | ||
2678 | leaf = path->nodes[0]; | ||
2679 | slot = path->slots[0]; | ||
2680 | if (path_change) | ||
2681 | *path_change = 1; | ||
2682 | } | ||
2683 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
2684 | if (key.objectid != extent_key->objectid) | ||
2685 | return -ENOENT; | ||
2686 | |||
2687 | if (key.type != BTRFS_EXTENT_REF_V0_KEY) { | ||
2688 | slot++; | ||
2689 | continue; | ||
2690 | } | ||
2691 | ref0 = btrfs_item_ptr(leaf, slot, | ||
2692 | struct btrfs_extent_ref_v0); | ||
2693 | *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); | ||
2694 | break; | ||
2695 | } | ||
2696 | return 0; | ||
2697 | } | ||
2698 | #endif | ||
2699 | |||
2700 | /* | ||
2701 | * helper to add a tree block to the list. | ||
2702 | * the major work is getting the generation and level of the block | ||
2703 | */ | ||
2704 | static int add_tree_block(struct reloc_control *rc, | ||
2705 | struct btrfs_key *extent_key, | ||
2706 | struct btrfs_path *path, | ||
2707 | struct rb_root *blocks) | ||
2708 | { | ||
2709 | struct extent_buffer *eb; | ||
2710 | struct btrfs_extent_item *ei; | ||
2711 | struct btrfs_tree_block_info *bi; | ||
2712 | struct tree_block *block; | ||
2713 | struct rb_node *rb_node; | ||
2714 | u32 item_size; | ||
2715 | int level = -1; | ||
2716 | int generation; | ||
2717 | |||
2718 | eb = path->nodes[0]; | ||
2719 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | ||
2720 | |||
2721 | if (item_size >= sizeof(*ei) + sizeof(*bi)) { | ||
2722 | ei = btrfs_item_ptr(eb, path->slots[0], | ||
2723 | struct btrfs_extent_item); | ||
2724 | bi = (struct btrfs_tree_block_info *)(ei + 1); | ||
2725 | generation = btrfs_extent_generation(eb, ei); | ||
2726 | level = btrfs_tree_block_level(eb, bi); | ||
2727 | } else { | ||
2728 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
2729 | u64 ref_owner; | ||
2730 | int ret; | ||
2731 | |||
2732 | BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); | ||
2733 | ret = get_ref_objectid_v0(rc, path, extent_key, | ||
2734 | &ref_owner, NULL); | ||
2735 | BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); | ||
2736 | level = (int)ref_owner; | ||
2737 | /* FIXME: get real generation */ | ||
2738 | generation = 0; | ||
2739 | #else | ||
2740 | BUG(); | ||
2741 | #endif | ||
2742 | } | ||
2743 | |||
2744 | btrfs_release_path(rc->extent_root, path); | ||
2745 | |||
2746 | BUG_ON(level == -1); | ||
2747 | |||
2748 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
2749 | if (!block) | ||
2750 | return -ENOMEM; | ||
2751 | |||
2752 | block->bytenr = extent_key->objectid; | ||
2753 | block->key.objectid = extent_key->offset; | ||
2754 | block->key.offset = generation; | ||
2755 | block->level = level; | ||
2756 | block->key_ready = 0; | ||
2757 | |||
2758 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
2759 | BUG_ON(rb_node); | ||
2760 | |||
2761 | return 0; | ||
2762 | } | ||
2763 | |||
2764 | /* | ||
2765 | * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY | ||
2766 | */ | ||
2767 | static int __add_tree_block(struct reloc_control *rc, | ||
2768 | u64 bytenr, u32 blocksize, | ||
2769 | struct rb_root *blocks) | ||
2770 | { | ||
2771 | struct btrfs_path *path; | ||
2772 | struct btrfs_key key; | ||
2773 | int ret; | ||
2774 | |||
2775 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2776 | return 0; | ||
2777 | |||
2778 | if (tree_search(blocks, bytenr)) | ||
2779 | return 0; | ||
2780 | |||
2781 | path = btrfs_alloc_path(); | ||
2782 | if (!path) | ||
2783 | return -ENOMEM; | ||
2784 | |||
2785 | key.objectid = bytenr; | ||
2786 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2787 | key.offset = blocksize; | ||
2788 | |||
2789 | path->search_commit_root = 1; | ||
2790 | path->skip_locking = 1; | ||
2791 | ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); | ||
2792 | if (ret < 0) | ||
2793 | goto out; | ||
2794 | BUG_ON(ret); | ||
2795 | |||
2796 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
2797 | ret = add_tree_block(rc, &key, path, blocks); | ||
2798 | out: | ||
2799 | btrfs_free_path(path); | ||
2800 | return ret; | ||
2801 | } | ||
2802 | |||
2803 | /* | ||
2804 | * helper to check if the block use full backrefs for pointers in it | ||
2805 | */ | ||
2806 | static int block_use_full_backref(struct reloc_control *rc, | ||
2807 | struct extent_buffer *eb) | ||
2808 | { | ||
2809 | struct btrfs_path *path; | ||
2810 | struct btrfs_extent_item *ei; | ||
2811 | struct btrfs_key key; | ||
2812 | u64 flags; | ||
2813 | int ret; | ||
2814 | |||
2815 | if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || | ||
2816 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) | ||
2817 | return 1; | ||
2818 | |||
2819 | path = btrfs_alloc_path(); | ||
2820 | BUG_ON(!path); | ||
2821 | |||
2822 | key.objectid = eb->start; | ||
2823 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2824 | key.offset = eb->len; | ||
2825 | |||
2826 | path->search_commit_root = 1; | ||
2827 | path->skip_locking = 1; | ||
2828 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
2829 | &key, path, 0, 0); | ||
2830 | BUG_ON(ret); | ||
2831 | |||
2832 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2833 | struct btrfs_extent_item); | ||
2834 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
2835 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
2836 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) | ||
2837 | ret = 1; | ||
2838 | else | ||
2839 | ret = 0; | ||
2840 | btrfs_free_path(path); | ||
2841 | return ret; | ||
2842 | } | ||
2843 | |||
2844 | /* | ||
2845 | * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY | ||
2846 | * this function scans fs tree to find blocks reference the data extent | ||
2847 | */ | ||
2848 | static int find_data_references(struct reloc_control *rc, | ||
2849 | struct btrfs_key *extent_key, | ||
2850 | struct extent_buffer *leaf, | ||
2851 | struct btrfs_extent_data_ref *ref, | ||
2852 | struct rb_root *blocks) | ||
2853 | { | ||
2854 | struct btrfs_path *path; | ||
2855 | struct tree_block *block; | ||
2856 | struct btrfs_root *root; | ||
2857 | struct btrfs_file_extent_item *fi; | ||
2858 | struct rb_node *rb_node; | ||
2859 | struct btrfs_key key; | ||
2860 | u64 ref_root; | ||
2861 | u64 ref_objectid; | ||
2862 | u64 ref_offset; | ||
2863 | u32 ref_count; | ||
2864 | u32 nritems; | ||
2865 | int err = 0; | ||
2866 | int added = 0; | ||
2867 | int counted; | ||
2868 | int ret; | ||
2869 | |||
2870 | path = btrfs_alloc_path(); | ||
2871 | if (!path) | ||
2872 | return -ENOMEM; | ||
2873 | |||
2874 | ref_root = btrfs_extent_data_ref_root(leaf, ref); | ||
2875 | ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); | ||
2876 | ref_offset = btrfs_extent_data_ref_offset(leaf, ref); | ||
2877 | ref_count = btrfs_extent_data_ref_count(leaf, ref); | ||
2878 | |||
2879 | root = read_fs_root(rc->extent_root->fs_info, ref_root); | ||
2880 | if (IS_ERR(root)) { | ||
2881 | err = PTR_ERR(root); | ||
2882 | goto out; | ||
2883 | } | ||
2884 | |||
2885 | key.objectid = ref_objectid; | ||
2886 | key.offset = ref_offset; | ||
2887 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
2888 | |||
2889 | path->search_commit_root = 1; | ||
2890 | path->skip_locking = 1; | ||
2891 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
2892 | if (ret < 0) { | ||
2893 | err = ret; | ||
2894 | goto out; | ||
2895 | } | ||
2896 | |||
2897 | leaf = path->nodes[0]; | ||
2898 | nritems = btrfs_header_nritems(leaf); | ||
2899 | /* | ||
2900 | * the references in tree blocks that use full backrefs | ||
2901 | * are not counted in | ||
2902 | */ | ||
2903 | if (block_use_full_backref(rc, leaf)) | ||
2904 | counted = 0; | ||
2905 | else | ||
2906 | counted = 1; | ||
2907 | rb_node = tree_search(blocks, leaf->start); | ||
2908 | if (rb_node) { | ||
2909 | if (counted) | ||
2910 | added = 1; | ||
2911 | else | ||
2912 | path->slots[0] = nritems; | ||
2913 | } | ||
2914 | |||
2915 | while (ref_count > 0) { | ||
2916 | while (path->slots[0] >= nritems) { | ||
2917 | ret = btrfs_next_leaf(root, path); | ||
2918 | if (ret < 0) { | ||
2919 | err = ret; | ||
2920 | goto out; | ||
2921 | } | ||
2922 | if (ret > 0) { | ||
2923 | WARN_ON(1); | ||
2924 | goto out; | ||
2925 | } | ||
2926 | |||
2927 | leaf = path->nodes[0]; | ||
2928 | nritems = btrfs_header_nritems(leaf); | ||
2929 | added = 0; | ||
2930 | |||
2931 | if (block_use_full_backref(rc, leaf)) | ||
2932 | counted = 0; | ||
2933 | else | ||
2934 | counted = 1; | ||
2935 | rb_node = tree_search(blocks, leaf->start); | ||
2936 | if (rb_node) { | ||
2937 | if (counted) | ||
2938 | added = 1; | ||
2939 | else | ||
2940 | path->slots[0] = nritems; | ||
2941 | } | ||
2942 | } | ||
2943 | |||
2944 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
2945 | if (key.objectid != ref_objectid || | ||
2946 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
2947 | WARN_ON(1); | ||
2948 | break; | ||
2949 | } | ||
2950 | |||
2951 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
2952 | struct btrfs_file_extent_item); | ||
2953 | |||
2954 | if (btrfs_file_extent_type(leaf, fi) == | ||
2955 | BTRFS_FILE_EXTENT_INLINE) | ||
2956 | goto next; | ||
2957 | |||
2958 | if (btrfs_file_extent_disk_bytenr(leaf, fi) != | ||
2959 | extent_key->objectid) | ||
2960 | goto next; | ||
2961 | |||
2962 | key.offset -= btrfs_file_extent_offset(leaf, fi); | ||
2963 | if (key.offset != ref_offset) | ||
2964 | goto next; | ||
2965 | |||
2966 | if (counted) | ||
2967 | ref_count--; | ||
2968 | if (added) | ||
2969 | goto next; | ||
2970 | |||
2971 | if (!tree_block_processed(leaf->start, leaf->len, rc)) { | ||
2972 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
2973 | if (!block) { | ||
2974 | err = -ENOMEM; | ||
2975 | break; | ||
2976 | } | ||
2977 | block->bytenr = leaf->start; | ||
2978 | btrfs_item_key_to_cpu(leaf, &block->key, 0); | ||
2979 | block->level = 0; | ||
2980 | block->key_ready = 1; | ||
2981 | rb_node = tree_insert(blocks, block->bytenr, | ||
2982 | &block->rb_node); | ||
2983 | BUG_ON(rb_node); | ||
2984 | } | ||
2985 | if (counted) | ||
2986 | added = 1; | ||
2987 | else | ||
2988 | path->slots[0] = nritems; | ||
2989 | next: | ||
2990 | path->slots[0]++; | ||
2991 | |||
2992 | } | ||
2993 | out: | ||
2994 | btrfs_free_path(path); | ||
2995 | return err; | ||
2996 | } | ||
2997 | |||
2998 | /* | ||
2999 | * hepler to find all tree blocks that reference a given data extent | ||
3000 | */ | ||
3001 | static noinline_for_stack | ||
3002 | int add_data_references(struct reloc_control *rc, | ||
3003 | struct btrfs_key *extent_key, | ||
3004 | struct btrfs_path *path, | ||
3005 | struct rb_root *blocks) | ||
3006 | { | ||
3007 | struct btrfs_key key; | ||
3008 | struct extent_buffer *eb; | ||
3009 | struct btrfs_extent_data_ref *dref; | ||
3010 | struct btrfs_extent_inline_ref *iref; | ||
3011 | unsigned long ptr; | ||
3012 | unsigned long end; | ||
3013 | u32 blocksize; | ||
3014 | int ret; | ||
3015 | int err = 0; | ||
3016 | |||
3017 | ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, | ||
3018 | extent_key->offset); | ||
3019 | BUG_ON(ret < 0); | ||
3020 | if (ret > 0) { | ||
3021 | /* the relocated data is fragmented */ | ||
3022 | rc->extents_skipped++; | ||
3023 | btrfs_release_path(rc->extent_root, path); | ||
3024 | return 0; | ||
3025 | } | ||
3026 | |||
3027 | blocksize = btrfs_level_size(rc->extent_root, 0); | ||
3028 | |||
3029 | eb = path->nodes[0]; | ||
3030 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); | ||
3031 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); | ||
3032 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
3033 | if (ptr + sizeof(struct btrfs_extent_item_v0) == end) | ||
3034 | ptr = end; | ||
3035 | else | ||
3036 | #endif | ||
3037 | ptr += sizeof(struct btrfs_extent_item); | ||
3038 | |||
3039 | while (ptr < end) { | ||
3040 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
3041 | key.type = btrfs_extent_inline_ref_type(eb, iref); | ||
3042 | if (key.type == BTRFS_SHARED_DATA_REF_KEY) { | ||
3043 | key.offset = btrfs_extent_inline_ref_offset(eb, iref); | ||
3044 | ret = __add_tree_block(rc, key.offset, blocksize, | ||
3045 | blocks); | ||
3046 | } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
3047 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
3048 | ret = find_data_references(rc, extent_key, | ||
3049 | eb, dref, blocks); | ||
3050 | } else { | ||
3051 | BUG(); | ||
3052 | } | ||
3053 | ptr += btrfs_extent_inline_ref_size(key.type); | ||
3054 | } | ||
3055 | WARN_ON(ptr > end); | ||
3056 | |||
3057 | while (1) { | ||
3058 | cond_resched(); | ||
3059 | eb = path->nodes[0]; | ||
3060 | if (path->slots[0] >= btrfs_header_nritems(eb)) { | ||
3061 | ret = btrfs_next_leaf(rc->extent_root, path); | ||
3062 | if (ret < 0) { | ||
3063 | err = ret; | ||
3064 | break; | ||
3065 | } | ||
3066 | if (ret > 0) | ||
3067 | break; | ||
3068 | eb = path->nodes[0]; | ||
3069 | } | ||
3070 | |||
3071 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | ||
3072 | if (key.objectid != extent_key->objectid) | ||
3073 | break; | ||
3074 | |||
3075 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
3076 | if (key.type == BTRFS_SHARED_DATA_REF_KEY || | ||
3077 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
3078 | #else | ||
3079 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | ||
3080 | if (key.type == BTRFS_SHARED_DATA_REF_KEY) { | ||
3081 | #endif | ||
3082 | ret = __add_tree_block(rc, key.offset, blocksize, | ||
3083 | blocks); | ||
3084 | } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
3085 | dref = btrfs_item_ptr(eb, path->slots[0], | ||
3086 | struct btrfs_extent_data_ref); | ||
3087 | ret = find_data_references(rc, extent_key, | ||
3088 | eb, dref, blocks); | ||
3089 | } else { | ||
3090 | ret = 0; | ||
3091 | } | ||
3092 | if (ret) { | ||
3093 | err = ret; | ||
3094 | break; | ||
3095 | } | ||
3096 | path->slots[0]++; | ||
3097 | } | ||
3098 | btrfs_release_path(rc->extent_root, path); | ||
3099 | if (err) | ||
3100 | free_block_list(blocks); | ||
3101 | return err; | ||
3102 | } | ||
3103 | |||
3104 | /* | ||
3105 | * hepler to find next unprocessed extent | ||
3106 | */ | ||
3107 | static noinline_for_stack | ||
3108 | int find_next_extent(struct btrfs_trans_handle *trans, | ||
3109 | struct reloc_control *rc, struct btrfs_path *path) | ||
3110 | { | ||
3111 | struct btrfs_key key; | ||
3112 | struct extent_buffer *leaf; | ||
3113 | u64 start, end, last; | ||
3114 | int ret; | ||
3115 | |||
3116 | last = rc->block_group->key.objectid + rc->block_group->key.offset; | ||
3117 | while (1) { | ||
3118 | cond_resched(); | ||
3119 | if (rc->search_start >= last) { | ||
3120 | ret = 1; | ||
3121 | break; | ||
3122 | } | ||
3123 | |||
3124 | key.objectid = rc->search_start; | ||
3125 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
3126 | key.offset = 0; | ||
3127 | |||
3128 | path->search_commit_root = 1; | ||
3129 | path->skip_locking = 1; | ||
3130 | ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, | ||
3131 | 0, 0); | ||
3132 | if (ret < 0) | ||
3133 | break; | ||
3134 | next: | ||
3135 | leaf = path->nodes[0]; | ||
3136 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
3137 | ret = btrfs_next_leaf(rc->extent_root, path); | ||
3138 | if (ret != 0) | ||
3139 | break; | ||
3140 | leaf = path->nodes[0]; | ||
3141 | } | ||
3142 | |||
3143 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
3144 | if (key.objectid >= last) { | ||
3145 | ret = 1; | ||
3146 | break; | ||
3147 | } | ||
3148 | |||
3149 | if (key.type != BTRFS_EXTENT_ITEM_KEY || | ||
3150 | key.objectid + key.offset <= rc->search_start) { | ||
3151 | path->slots[0]++; | ||
3152 | goto next; | ||
3153 | } | ||
3154 | |||
3155 | ret = find_first_extent_bit(&rc->processed_blocks, | ||
3156 | key.objectid, &start, &end, | ||
3157 | EXTENT_DIRTY); | ||
3158 | |||
3159 | if (ret == 0 && start <= key.objectid) { | ||
3160 | btrfs_release_path(rc->extent_root, path); | ||
3161 | rc->search_start = end + 1; | ||
3162 | } else { | ||
3163 | rc->search_start = key.objectid + key.offset; | ||
3164 | return 0; | ||
3165 | } | ||
3166 | } | ||
3167 | btrfs_release_path(rc->extent_root, path); | ||
3168 | return ret; | ||
3169 | } | ||
3170 | |||
3171 | static void set_reloc_control(struct reloc_control *rc) | ||
3172 | { | ||
3173 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | ||
3174 | mutex_lock(&fs_info->trans_mutex); | ||
3175 | fs_info->reloc_ctl = rc; | ||
3176 | mutex_unlock(&fs_info->trans_mutex); | ||
3177 | } | ||
3178 | |||
3179 | static void unset_reloc_control(struct reloc_control *rc) | ||
3180 | { | ||
3181 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | ||
3182 | mutex_lock(&fs_info->trans_mutex); | ||
3183 | fs_info->reloc_ctl = NULL; | ||
3184 | mutex_unlock(&fs_info->trans_mutex); | ||
3185 | } | ||
3186 | |||
3187 | static int check_extent_flags(u64 flags) | ||
3188 | { | ||
3189 | if ((flags & BTRFS_EXTENT_FLAG_DATA) && | ||
3190 | (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) | ||
3191 | return 1; | ||
3192 | if (!(flags & BTRFS_EXTENT_FLAG_DATA) && | ||
3193 | !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) | ||
3194 | return 1; | ||
3195 | if ((flags & BTRFS_EXTENT_FLAG_DATA) && | ||
3196 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
3197 | return 1; | ||
3198 | return 0; | ||
3199 | } | ||
3200 | |||
3201 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | ||
3202 | { | ||
3203 | struct rb_root blocks = RB_ROOT; | ||
3204 | struct btrfs_key key; | ||
3205 | struct btrfs_trans_handle *trans = NULL; | ||
3206 | struct btrfs_path *path; | ||
3207 | struct btrfs_extent_item *ei; | ||
3208 | unsigned long nr; | ||
3209 | u64 flags; | ||
3210 | u32 item_size; | ||
3211 | int ret; | ||
3212 | int err = 0; | ||
3213 | |||
3214 | path = btrfs_alloc_path(); | ||
3215 | if (!path) | ||
3216 | return -ENOMEM; | ||
3217 | |||
3218 | rc->search_start = rc->block_group->key.objectid; | ||
3219 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3220 | GFP_NOFS); | ||
3221 | |||
3222 | rc->create_reloc_root = 1; | ||
3223 | set_reloc_control(rc); | ||
3224 | |||
3225 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3226 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3227 | |||
3228 | while (1) { | ||
3229 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3230 | |||
3231 | ret = find_next_extent(trans, rc, path); | ||
3232 | if (ret < 0) | ||
3233 | err = ret; | ||
3234 | if (ret != 0) | ||
3235 | break; | ||
3236 | |||
3237 | rc->extents_found++; | ||
3238 | |||
3239 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3240 | struct btrfs_extent_item); | ||
3241 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
3242 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
3243 | path->slots[0]); | ||
3244 | if (item_size >= sizeof(*ei)) { | ||
3245 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
3246 | ret = check_extent_flags(flags); | ||
3247 | BUG_ON(ret); | ||
3248 | |||
3249 | } else { | ||
3250 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
3251 | u64 ref_owner; | ||
3252 | int path_change = 0; | ||
3253 | |||
3254 | BUG_ON(item_size != | ||
3255 | sizeof(struct btrfs_extent_item_v0)); | ||
3256 | ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, | ||
3257 | &path_change); | ||
3258 | if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) | ||
3259 | flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; | ||
3260 | else | ||
3261 | flags = BTRFS_EXTENT_FLAG_DATA; | ||
3262 | |||
3263 | if (path_change) { | ||
3264 | btrfs_release_path(rc->extent_root, path); | ||
3265 | |||
3266 | path->search_commit_root = 1; | ||
3267 | path->skip_locking = 1; | ||
3268 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
3269 | &key, path, 0, 0); | ||
3270 | if (ret < 0) { | ||
3271 | err = ret; | ||
3272 | break; | ||
3273 | } | ||
3274 | BUG_ON(ret > 0); | ||
3275 | } | ||
3276 | #else | ||
3277 | BUG(); | ||
3278 | #endif | ||
3279 | } | ||
3280 | |||
3281 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
3282 | ret = add_tree_block(rc, &key, path, &blocks); | ||
3283 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
3284 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | ||
3285 | ret = add_data_references(rc, &key, path, &blocks); | ||
3286 | } else { | ||
3287 | btrfs_release_path(rc->extent_root, path); | ||
3288 | ret = 0; | ||
3289 | } | ||
3290 | if (ret < 0) { | ||
3291 | err = 0; | ||
3292 | break; | ||
3293 | } | ||
3294 | |||
3295 | if (!RB_EMPTY_ROOT(&blocks)) { | ||
3296 | ret = relocate_tree_blocks(trans, rc, &blocks); | ||
3297 | if (ret < 0) { | ||
3298 | err = ret; | ||
3299 | break; | ||
3300 | } | ||
3301 | } | ||
3302 | |||
3303 | nr = trans->blocks_used; | ||
3304 | btrfs_end_transaction_throttle(trans, rc->extent_root); | ||
3305 | trans = NULL; | ||
3306 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3307 | |||
3308 | if (rc->stage == MOVE_DATA_EXTENTS && | ||
3309 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | ||
3310 | rc->found_file_extent = 1; | ||
3311 | ret = relocate_data_extent(rc->data_inode, &key); | ||
3312 | if (ret < 0) { | ||
3313 | err = ret; | ||
3314 | break; | ||
3315 | } | ||
3316 | } | ||
3317 | } | ||
3318 | btrfs_free_path(path); | ||
3319 | |||
3320 | if (trans) { | ||
3321 | nr = trans->blocks_used; | ||
3322 | btrfs_end_transaction(trans, rc->extent_root); | ||
3323 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3324 | } | ||
3325 | |||
3326 | rc->create_reloc_root = 0; | ||
3327 | smp_mb(); | ||
3328 | |||
3329 | if (rc->extents_found > 0) { | ||
3330 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3331 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3332 | } | ||
3333 | |||
3334 | merge_reloc_roots(rc); | ||
3335 | |||
3336 | unset_reloc_control(rc); | ||
3337 | |||
3338 | /* get rid of pinned extents */ | ||
3339 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3340 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3341 | |||
3342 | return err; | ||
3343 | } | ||
3344 | |||
3345 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | ||
3346 | struct btrfs_root *root, | ||
3347 | u64 objectid, u64 size) | ||
3348 | { | ||
3349 | struct btrfs_path *path; | ||
3350 | struct btrfs_inode_item *item; | ||
3351 | struct extent_buffer *leaf; | ||
3352 | int ret; | ||
3353 | |||
3354 | path = btrfs_alloc_path(); | ||
3355 | if (!path) | ||
3356 | return -ENOMEM; | ||
3357 | |||
3358 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
3359 | if (ret) | ||
3360 | goto out; | ||
3361 | |||
3362 | leaf = path->nodes[0]; | ||
3363 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
3364 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
3365 | btrfs_set_inode_generation(leaf, item, 1); | ||
3366 | btrfs_set_inode_size(leaf, item, size); | ||
3367 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
3368 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | ||
3369 | btrfs_mark_buffer_dirty(leaf); | ||
3370 | btrfs_release_path(root, path); | ||
3371 | out: | ||
3372 | btrfs_free_path(path); | ||
3373 | return ret; | ||
3374 | } | ||
3375 | |||
3376 | /* | ||
3377 | * helper to create inode for data relocation. | ||
3378 | * the inode is in data relocation tree and its link count is 0 | ||
3379 | */ | ||
3380 | static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
3381 | struct btrfs_block_group_cache *group) | ||
3382 | { | ||
3383 | struct inode *inode = NULL; | ||
3384 | struct btrfs_trans_handle *trans; | ||
3385 | struct btrfs_root *root; | ||
3386 | struct btrfs_key key; | ||
3387 | unsigned long nr; | ||
3388 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | ||
3389 | int err = 0; | ||
3390 | |||
3391 | root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
3392 | if (IS_ERR(root)) | ||
3393 | return ERR_CAST(root); | ||
3394 | |||
3395 | trans = btrfs_start_transaction(root, 1); | ||
3396 | BUG_ON(!trans); | ||
3397 | |||
3398 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | ||
3399 | if (err) | ||
3400 | goto out; | ||
3401 | |||
3402 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | ||
3403 | BUG_ON(err); | ||
3404 | |||
3405 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
3406 | group->key.offset, 0, group->key.offset, | ||
3407 | 0, 0, 0); | ||
3408 | BUG_ON(err); | ||
3409 | |||
3410 | key.objectid = objectid; | ||
3411 | key.type = BTRFS_INODE_ITEM_KEY; | ||
3412 | key.offset = 0; | ||
3413 | inode = btrfs_iget(root->fs_info->sb, &key, root); | ||
3414 | BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); | ||
3415 | BTRFS_I(inode)->index_cnt = group->key.objectid; | ||
3416 | |||
3417 | err = btrfs_orphan_add(trans, inode); | ||
3418 | out: | ||
3419 | nr = trans->blocks_used; | ||
3420 | btrfs_end_transaction(trans, root); | ||
3421 | |||
3422 | btrfs_btree_balance_dirty(root, nr); | ||
3423 | if (err) { | ||
3424 | if (inode) | ||
3425 | iput(inode); | ||
3426 | inode = ERR_PTR(err); | ||
3427 | } | ||
3428 | return inode; | ||
3429 | } | ||
3430 | |||
3431 | /* | ||
3432 | * function to relocate all extents in a block group. | ||
3433 | */ | ||
3434 | int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | ||
3435 | { | ||
3436 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | ||
3437 | struct reloc_control *rc; | ||
3438 | int ret; | ||
3439 | int err = 0; | ||
3440 | |||
3441 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
3442 | if (!rc) | ||
3443 | return -ENOMEM; | ||
3444 | |||
3445 | mapping_tree_init(&rc->reloc_root_tree); | ||
3446 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3447 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3448 | |||
3449 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); | ||
3450 | BUG_ON(!rc->block_group); | ||
3451 | |||
3452 | btrfs_init_workers(&rc->workers, "relocate", | ||
3453 | fs_info->thread_pool_size); | ||
3454 | |||
3455 | rc->extent_root = extent_root; | ||
3456 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | ||
3457 | |||
3458 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | ||
3459 | if (IS_ERR(rc->data_inode)) { | ||
3460 | err = PTR_ERR(rc->data_inode); | ||
3461 | rc->data_inode = NULL; | ||
3462 | goto out; | ||
3463 | } | ||
3464 | |||
3465 | printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", | ||
3466 | (unsigned long long)rc->block_group->key.objectid, | ||
3467 | (unsigned long long)rc->block_group->flags); | ||
3468 | |||
3469 | btrfs_start_delalloc_inodes(fs_info->tree_root); | ||
3470 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | ||
3471 | |||
3472 | while (1) { | ||
3473 | mutex_lock(&fs_info->cleaner_mutex); | ||
3474 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
3475 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3476 | |||
3477 | rc->extents_found = 0; | ||
3478 | rc->extents_skipped = 0; | ||
3479 | |||
3480 | ret = relocate_block_group(rc); | ||
3481 | if (ret < 0) { | ||
3482 | err = ret; | ||
3483 | break; | ||
3484 | } | ||
3485 | |||
3486 | if (rc->extents_found == 0) | ||
3487 | break; | ||
3488 | |||
3489 | printk(KERN_INFO "btrfs: found %llu extents\n", | ||
3490 | (unsigned long long)rc->extents_found); | ||
3491 | |||
3492 | if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { | ||
3493 | btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); | ||
3494 | invalidate_mapping_pages(rc->data_inode->i_mapping, | ||
3495 | 0, -1); | ||
3496 | rc->stage = UPDATE_DATA_PTRS; | ||
3497 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
3498 | rc->extents_skipped >= rc->extents_found) { | ||
3499 | iput(rc->data_inode); | ||
3500 | rc->data_inode = create_reloc_inode(fs_info, | ||
3501 | rc->block_group); | ||
3502 | if (IS_ERR(rc->data_inode)) { | ||
3503 | err = PTR_ERR(rc->data_inode); | ||
3504 | rc->data_inode = NULL; | ||
3505 | break; | ||
3506 | } | ||
3507 | rc->stage = MOVE_DATA_EXTENTS; | ||
3508 | rc->found_file_extent = 0; | ||
3509 | } | ||
3510 | } | ||
3511 | |||
3512 | filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, | ||
3513 | rc->block_group->key.objectid, | ||
3514 | rc->block_group->key.objectid + | ||
3515 | rc->block_group->key.offset - 1); | ||
3516 | |||
3517 | WARN_ON(rc->block_group->pinned > 0); | ||
3518 | WARN_ON(rc->block_group->reserved > 0); | ||
3519 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); | ||
3520 | out: | ||
3521 | iput(rc->data_inode); | ||
3522 | btrfs_stop_workers(&rc->workers); | ||
3523 | btrfs_put_block_group(rc->block_group); | ||
3524 | kfree(rc); | ||
3525 | return err; | ||
3526 | } | ||
3527 | |||
3528 | /* | ||
3529 | * recover relocation interrupted by system crash. | ||
3530 | * | ||
3531 | * this function resumes merging reloc trees with corresponding fs trees. | ||
3532 | * this is important for keeping the sharing of tree blocks | ||
3533 | */ | ||
3534 | int btrfs_recover_relocation(struct btrfs_root *root) | ||
3535 | { | ||
3536 | LIST_HEAD(reloc_roots); | ||
3537 | struct btrfs_key key; | ||
3538 | struct btrfs_root *fs_root; | ||
3539 | struct btrfs_root *reloc_root; | ||
3540 | struct btrfs_path *path; | ||
3541 | struct extent_buffer *leaf; | ||
3542 | struct reloc_control *rc = NULL; | ||
3543 | struct btrfs_trans_handle *trans; | ||
3544 | int ret; | ||
3545 | int err = 0; | ||
3546 | |||
3547 | path = btrfs_alloc_path(); | ||
3548 | if (!path) | ||
3549 | return -ENOMEM; | ||
3550 | |||
3551 | key.objectid = BTRFS_TREE_RELOC_OBJECTID; | ||
3552 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
3553 | key.offset = (u64)-1; | ||
3554 | |||
3555 | while (1) { | ||
3556 | ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, | ||
3557 | path, 0, 0); | ||
3558 | if (ret < 0) { | ||
3559 | err = ret; | ||
3560 | goto out; | ||
3561 | } | ||
3562 | if (ret > 0) { | ||
3563 | if (path->slots[0] == 0) | ||
3564 | break; | ||
3565 | path->slots[0]--; | ||
3566 | } | ||
3567 | leaf = path->nodes[0]; | ||
3568 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
3569 | btrfs_release_path(root->fs_info->tree_root, path); | ||
3570 | |||
3571 | if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || | ||
3572 | key.type != BTRFS_ROOT_ITEM_KEY) | ||
3573 | break; | ||
3574 | |||
3575 | reloc_root = btrfs_read_fs_root_no_radix(root, &key); | ||
3576 | if (IS_ERR(reloc_root)) { | ||
3577 | err = PTR_ERR(reloc_root); | ||
3578 | goto out; | ||
3579 | } | ||
3580 | |||
3581 | list_add(&reloc_root->root_list, &reloc_roots); | ||
3582 | |||
3583 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
3584 | fs_root = read_fs_root(root->fs_info, | ||
3585 | reloc_root->root_key.offset); | ||
3586 | if (IS_ERR(fs_root)) { | ||
3587 | err = PTR_ERR(fs_root); | ||
3588 | goto out; | ||
3589 | } | ||
3590 | } | ||
3591 | |||
3592 | if (key.offset == 0) | ||
3593 | break; | ||
3594 | |||
3595 | key.offset--; | ||
3596 | } | ||
3597 | btrfs_release_path(root->fs_info->tree_root, path); | ||
3598 | |||
3599 | if (list_empty(&reloc_roots)) | ||
3600 | goto out; | ||
3601 | |||
3602 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
3603 | if (!rc) { | ||
3604 | err = -ENOMEM; | ||
3605 | goto out; | ||
3606 | } | ||
3607 | |||
3608 | mapping_tree_init(&rc->reloc_root_tree); | ||
3609 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3610 | btrfs_init_workers(&rc->workers, "relocate", | ||
3611 | root->fs_info->thread_pool_size); | ||
3612 | rc->extent_root = root->fs_info->extent_root; | ||
3613 | |||
3614 | set_reloc_control(rc); | ||
3615 | |||
3616 | while (!list_empty(&reloc_roots)) { | ||
3617 | reloc_root = list_entry(reloc_roots.next, | ||
3618 | struct btrfs_root, root_list); | ||
3619 | list_del(&reloc_root->root_list); | ||
3620 | |||
3621 | if (btrfs_root_refs(&reloc_root->root_item) == 0) { | ||
3622 | list_add_tail(&reloc_root->root_list, | ||
3623 | &rc->reloc_roots); | ||
3624 | continue; | ||
3625 | } | ||
3626 | |||
3627 | fs_root = read_fs_root(root->fs_info, | ||
3628 | reloc_root->root_key.offset); | ||
3629 | BUG_ON(IS_ERR(fs_root)); | ||
3630 | |||
3631 | __add_reloc_root(reloc_root); | ||
3632 | fs_root->reloc_root = reloc_root; | ||
3633 | } | ||
3634 | |||
3635 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3636 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3637 | |||
3638 | merge_reloc_roots(rc); | ||
3639 | |||
3640 | unset_reloc_control(rc); | ||
3641 | |||
3642 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3643 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3644 | out: | ||
3645 | if (rc) { | ||
3646 | btrfs_stop_workers(&rc->workers); | ||
3647 | kfree(rc); | ||
3648 | } | ||
3649 | while (!list_empty(&reloc_roots)) { | ||
3650 | reloc_root = list_entry(reloc_roots.next, | ||
3651 | struct btrfs_root, root_list); | ||
3652 | list_del(&reloc_root->root_list); | ||
3653 | free_extent_buffer(reloc_root->node); | ||
3654 | free_extent_buffer(reloc_root->commit_root); | ||
3655 | kfree(reloc_root); | ||
3656 | } | ||
3657 | btrfs_free_path(path); | ||
3658 | |||
3659 | if (err == 0) { | ||
3660 | /* cleanup orphan inode in data relocation tree */ | ||
3661 | fs_root = read_fs_root(root->fs_info, | ||
3662 | BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
3663 | if (IS_ERR(fs_root)) | ||
3664 | err = PTR_ERR(fs_root); | ||
3665 | } | ||
3666 | return err; | ||
3667 | } | ||
3668 | |||
3669 | /* | ||
3670 | * helper to add ordered checksum for data relocation. | ||
3671 | * | ||
3672 | * cloning checksum properly handles the nodatasum extents. | ||
3673 | * it also saves CPU time to re-calculate the checksum. | ||
3674 | */ | ||
3675 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | ||
3676 | { | ||
3677 | struct btrfs_ordered_sum *sums; | ||
3678 | struct btrfs_sector_sum *sector_sum; | ||
3679 | struct btrfs_ordered_extent *ordered; | ||
3680 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3681 | size_t offset; | ||
3682 | int ret; | ||
3683 | u64 disk_bytenr; | ||
3684 | LIST_HEAD(list); | ||
3685 | |||
3686 | ordered = btrfs_lookup_ordered_extent(inode, file_pos); | ||
3687 | BUG_ON(ordered->file_offset != file_pos || ordered->len != len); | ||
3688 | |||
3689 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | ||
3690 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | ||
3691 | disk_bytenr + len - 1, &list); | ||
3692 | |||
3693 | while (!list_empty(&list)) { | ||
3694 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
3695 | list_del_init(&sums->list); | ||
3696 | |||
3697 | sector_sum = sums->sums; | ||
3698 | sums->bytenr = ordered->start; | ||
3699 | |||
3700 | offset = 0; | ||
3701 | while (offset < sums->len) { | ||
3702 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
3703 | sector_sum++; | ||
3704 | offset += root->sectorsize; | ||
3705 | } | ||
3706 | |||
3707 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
3708 | } | ||
3709 | btrfs_put_ordered_extent(ordered); | ||
3710 | return 0; | ||
3711 | } | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index b48650de4472..0ddc6d61c55a 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -111,6 +111,15 @@ out: | |||
111 | return ret; | 111 | return ret; |
112 | } | 112 | } |
113 | 113 | ||
114 | int btrfs_set_root_node(struct btrfs_root_item *item, | ||
115 | struct extent_buffer *node) | ||
116 | { | ||
117 | btrfs_set_root_bytenr(item, node->start); | ||
118 | btrfs_set_root_level(item, btrfs_header_level(node)); | ||
119 | btrfs_set_root_generation(item, btrfs_header_generation(node)); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
114 | /* | 123 | /* |
115 | * copy the data in 'item' into the btree | 124 | * copy the data in 'item' into the btree |
116 | */ | 125 | */ |
@@ -164,8 +173,7 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
164 | * offset lower than the latest root. They need to be queued for deletion to | 173 | * offset lower than the latest root. They need to be queued for deletion to |
165 | * finish what was happening when we crashed. | 174 | * finish what was happening when we crashed. |
166 | */ | 175 | */ |
167 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, | 176 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) |
168 | struct btrfs_root *latest) | ||
169 | { | 177 | { |
170 | struct btrfs_root *dead_root; | 178 | struct btrfs_root *dead_root; |
171 | struct btrfs_item *item; | 179 | struct btrfs_item *item; |
@@ -227,10 +235,7 @@ again: | |||
227 | goto err; | 235 | goto err; |
228 | } | 236 | } |
229 | 237 | ||
230 | if (objectid == BTRFS_TREE_RELOC_OBJECTID) | 238 | ret = btrfs_add_dead_root(dead_root); |
231 | ret = btrfs_add_dead_reloc_root(dead_root); | ||
232 | else | ||
233 | ret = btrfs_add_dead_root(dead_root, latest); | ||
234 | if (ret) | 239 | if (ret) |
235 | goto err; | 240 | goto err; |
236 | goto again; | 241 | goto again; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2ff7cd2db25f..9f179d4832d5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include "export.h" | 52 | #include "export.h" |
53 | #include "compression.h" | 53 | #include "compression.h" |
54 | 54 | ||
55 | |||
56 | static struct super_operations btrfs_super_ops; | 55 | static struct super_operations btrfs_super_ops; |
57 | 56 | ||
58 | static void btrfs_put_super(struct super_block *sb) | 57 | static void btrfs_put_super(struct super_block *sb) |
@@ -67,8 +66,8 @@ static void btrfs_put_super(struct super_block *sb) | |||
67 | enum { | 66 | enum { |
68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 67 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 68 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, | 69 | Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, |
71 | Opt_ratio, Opt_flushoncommit, Opt_err, | 70 | Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err, |
72 | }; | 71 | }; |
73 | 72 | ||
74 | static match_table_t tokens = { | 73 | static match_table_t tokens = { |
@@ -84,6 +83,8 @@ static match_table_t tokens = { | |||
84 | {Opt_thread_pool, "thread_pool=%d"}, | 83 | {Opt_thread_pool, "thread_pool=%d"}, |
85 | {Opt_compress, "compress"}, | 84 | {Opt_compress, "compress"}, |
86 | {Opt_ssd, "ssd"}, | 85 | {Opt_ssd, "ssd"}, |
86 | {Opt_ssd_spread, "ssd_spread"}, | ||
87 | {Opt_nossd, "nossd"}, | ||
87 | {Opt_noacl, "noacl"}, | 88 | {Opt_noacl, "noacl"}, |
88 | {Opt_notreelog, "notreelog"}, | 89 | {Opt_notreelog, "notreelog"}, |
89 | {Opt_flushoncommit, "flushoncommit"}, | 90 | {Opt_flushoncommit, "flushoncommit"}, |
@@ -158,7 +159,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
158 | */ | 159 | */ |
159 | break; | 160 | break; |
160 | case Opt_nodatasum: | 161 | case Opt_nodatasum: |
161 | printk(KERN_INFO "btrfs: setting nodatacsum\n"); | 162 | printk(KERN_INFO "btrfs: setting nodatasum\n"); |
162 | btrfs_set_opt(info->mount_opt, NODATASUM); | 163 | btrfs_set_opt(info->mount_opt, NODATASUM); |
163 | break; | 164 | break; |
164 | case Opt_nodatacow: | 165 | case Opt_nodatacow: |
@@ -174,6 +175,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
174 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 175 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
175 | btrfs_set_opt(info->mount_opt, SSD); | 176 | btrfs_set_opt(info->mount_opt, SSD); |
176 | break; | 177 | break; |
178 | case Opt_ssd_spread: | ||
179 | printk(KERN_INFO "btrfs: use spread ssd " | ||
180 | "allocation scheme\n"); | ||
181 | btrfs_set_opt(info->mount_opt, SSD); | ||
182 | btrfs_set_opt(info->mount_opt, SSD_SPREAD); | ||
183 | break; | ||
184 | case Opt_nossd: | ||
185 | printk(KERN_INFO "btrfs: not using ssd allocation " | ||
186 | "scheme\n"); | ||
187 | btrfs_set_opt(info->mount_opt, NOSSD); | ||
188 | btrfs_clear_opt(info->mount_opt, SSD); | ||
189 | btrfs_clear_opt(info->mount_opt, SSD_SPREAD); | ||
190 | break; | ||
177 | case Opt_nobarrier: | 191 | case Opt_nobarrier: |
178 | printk(KERN_INFO "btrfs: turning off barriers\n"); | 192 | printk(KERN_INFO "btrfs: turning off barriers\n"); |
179 | btrfs_set_opt(info->mount_opt, NOBARRIER); | 193 | btrfs_set_opt(info->mount_opt, NOBARRIER); |
@@ -322,7 +336,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
322 | struct dentry *root_dentry; | 336 | struct dentry *root_dentry; |
323 | struct btrfs_super_block *disk_super; | 337 | struct btrfs_super_block *disk_super; |
324 | struct btrfs_root *tree_root; | 338 | struct btrfs_root *tree_root; |
325 | struct btrfs_inode *bi; | 339 | struct btrfs_key key; |
326 | int err; | 340 | int err; |
327 | 341 | ||
328 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 342 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
@@ -341,23 +355,15 @@ static int btrfs_fill_super(struct super_block *sb, | |||
341 | } | 355 | } |
342 | sb->s_fs_info = tree_root; | 356 | sb->s_fs_info = tree_root; |
343 | disk_super = &tree_root->fs_info->super_copy; | 357 | disk_super = &tree_root->fs_info->super_copy; |
344 | inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID, | ||
345 | tree_root->fs_info->fs_root); | ||
346 | bi = BTRFS_I(inode); | ||
347 | bi->location.objectid = inode->i_ino; | ||
348 | bi->location.offset = 0; | ||
349 | bi->root = tree_root->fs_info->fs_root; | ||
350 | |||
351 | btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); | ||
352 | 358 | ||
353 | if (!inode) { | 359 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
354 | err = -ENOMEM; | 360 | key.type = BTRFS_INODE_ITEM_KEY; |
361 | key.offset = 0; | ||
362 | inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root); | ||
363 | if (IS_ERR(inode)) { | ||
364 | err = PTR_ERR(inode); | ||
355 | goto fail_close; | 365 | goto fail_close; |
356 | } | 366 | } |
357 | if (inode->i_state & I_NEW) { | ||
358 | btrfs_read_locked_inode(inode); | ||
359 | unlock_new_inode(inode); | ||
360 | } | ||
361 | 367 | ||
362 | root_dentry = d_alloc_root(inode); | 368 | root_dentry = d_alloc_root(inode); |
363 | if (!root_dentry) { | 369 | if (!root_dentry) { |
@@ -388,10 +394,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
388 | struct btrfs_root *root = btrfs_sb(sb); | 394 | struct btrfs_root *root = btrfs_sb(sb); |
389 | int ret; | 395 | int ret; |
390 | 396 | ||
391 | if (sb->s_flags & MS_RDONLY) | ||
392 | return 0; | ||
393 | |||
394 | sb->s_dirt = 0; | ||
395 | if (!wait) { | 397 | if (!wait) { |
396 | filemap_flush(root->fs_info->btree_inode->i_mapping); | 398 | filemap_flush(root->fs_info->btree_inode->i_mapping); |
397 | return 0; | 399 | return 0; |
@@ -402,7 +404,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
402 | 404 | ||
403 | trans = btrfs_start_transaction(root, 1); | 405 | trans = btrfs_start_transaction(root, 1); |
404 | ret = btrfs_commit_transaction(trans, root); | 406 | ret = btrfs_commit_transaction(trans, root); |
405 | sb->s_dirt = 0; | ||
406 | return ret; | 407 | return ret; |
407 | } | 408 | } |
408 | 409 | ||
@@ -433,7 +434,11 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
433 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 434 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
434 | if (btrfs_test_opt(root, COMPRESS)) | 435 | if (btrfs_test_opt(root, COMPRESS)) |
435 | seq_puts(seq, ",compress"); | 436 | seq_puts(seq, ",compress"); |
436 | if (btrfs_test_opt(root, SSD)) | 437 | if (btrfs_test_opt(root, NOSSD)) |
438 | seq_puts(seq, ",nossd"); | ||
439 | if (btrfs_test_opt(root, SSD_SPREAD)) | ||
440 | seq_puts(seq, ",ssd_spread"); | ||
441 | else if (btrfs_test_opt(root, SSD)) | ||
437 | seq_puts(seq, ",ssd"); | 442 | seq_puts(seq, ",ssd"); |
438 | if (btrfs_test_opt(root, NOTREELOG)) | 443 | if (btrfs_test_opt(root, NOTREELOG)) |
439 | seq_puts(seq, ",notreelog"); | 444 | seq_puts(seq, ",notreelog"); |
@@ -444,11 +449,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
444 | return 0; | 449 | return 0; |
445 | } | 450 | } |
446 | 451 | ||
447 | static void btrfs_write_super(struct super_block *sb) | ||
448 | { | ||
449 | sb->s_dirt = 0; | ||
450 | } | ||
451 | |||
452 | static int btrfs_test_super(struct super_block *s, void *data) | 452 | static int btrfs_test_super(struct super_block *s, void *data) |
453 | { | 453 | { |
454 | struct btrfs_fs_devices *test_fs_devices = data; | 454 | struct btrfs_fs_devices *test_fs_devices = data; |
@@ -584,7 +584,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
584 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 584 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) |
585 | return -EINVAL; | 585 | return -EINVAL; |
586 | 586 | ||
587 | ret = btrfs_cleanup_reloc_trees(root); | 587 | /* recover relocation */ |
588 | ret = btrfs_recover_relocation(root); | ||
588 | WARN_ON(ret); | 589 | WARN_ON(ret); |
589 | 590 | ||
590 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 591 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
@@ -678,7 +679,6 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
678 | static struct super_operations btrfs_super_ops = { | 679 | static struct super_operations btrfs_super_ops = { |
679 | .delete_inode = btrfs_delete_inode, | 680 | .delete_inode = btrfs_delete_inode, |
680 | .put_super = btrfs_put_super, | 681 | .put_super = btrfs_put_super, |
681 | .write_super = btrfs_write_super, | ||
682 | .sync_fs = btrfs_sync_fs, | 682 | .sync_fs = btrfs_sync_fs, |
683 | .show_options = btrfs_show_options, | 683 | .show_options = btrfs_show_options, |
684 | .write_inode = btrfs_write_inode, | 684 | .write_inode = btrfs_write_inode, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 01b143605ec1..2e177d7f4bb9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include "disk-io.h" | 25 | #include "disk-io.h" |
26 | #include "transaction.h" | 26 | #include "transaction.h" |
27 | #include "locking.h" | 27 | #include "locking.h" |
28 | #include "ref-cache.h" | ||
29 | #include "tree-log.h" | 28 | #include "tree-log.h" |
30 | 29 | ||
31 | #define BTRFS_ROOT_TRANS_TAG 0 | 30 | #define BTRFS_ROOT_TRANS_TAG 0 |
@@ -94,45 +93,37 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
94 | * to make sure the old root from before we joined the transaction is deleted | 93 | * to make sure the old root from before we joined the transaction is deleted |
95 | * when the transaction commits | 94 | * when the transaction commits |
96 | */ | 95 | */ |
97 | noinline int btrfs_record_root_in_trans(struct btrfs_root *root) | 96 | static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, |
97 | struct btrfs_root *root) | ||
98 | { | 98 | { |
99 | struct btrfs_dirty_root *dirty; | 99 | if (root->ref_cows && root->last_trans < trans->transid) { |
100 | u64 running_trans_id = root->fs_info->running_transaction->transid; | ||
101 | if (root->ref_cows && root->last_trans < running_trans_id) { | ||
102 | WARN_ON(root == root->fs_info->extent_root); | 100 | WARN_ON(root == root->fs_info->extent_root); |
103 | if (root->root_item.refs != 0) { | 101 | WARN_ON(root->root_item.refs == 0); |
104 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 102 | WARN_ON(root->commit_root != root->node); |
105 | (unsigned long)root->root_key.objectid, | 103 | |
106 | BTRFS_ROOT_TRANS_TAG); | 104 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
107 | 105 | (unsigned long)root->root_key.objectid, | |
108 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); | 106 | BTRFS_ROOT_TRANS_TAG); |
109 | BUG_ON(!dirty); | 107 | root->last_trans = trans->transid; |
110 | dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); | 108 | btrfs_init_reloc_root(trans, root); |
111 | BUG_ON(!dirty->root); | 109 | } |
112 | dirty->latest_root = root; | 110 | return 0; |
113 | INIT_LIST_HEAD(&dirty->list); | 111 | } |
114 | |||
115 | root->commit_root = btrfs_root_node(root); | ||
116 | |||
117 | memcpy(dirty->root, root, sizeof(*root)); | ||
118 | spin_lock_init(&dirty->root->node_lock); | ||
119 | spin_lock_init(&dirty->root->list_lock); | ||
120 | mutex_init(&dirty->root->objectid_mutex); | ||
121 | mutex_init(&dirty->root->log_mutex); | ||
122 | INIT_LIST_HEAD(&dirty->root->dead_list); | ||
123 | dirty->root->node = root->commit_root; | ||
124 | dirty->root->commit_root = NULL; | ||
125 | 112 | ||
126 | spin_lock(&root->list_lock); | 113 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
127 | list_add(&dirty->root->dead_list, &root->dead_list); | 114 | struct btrfs_root *root) |
128 | spin_unlock(&root->list_lock); | 115 | { |
116 | if (!root->ref_cows) | ||
117 | return 0; | ||
129 | 118 | ||
130 | root->dirty_root = dirty; | 119 | mutex_lock(&root->fs_info->trans_mutex); |
131 | } else { | 120 | if (root->last_trans == trans->transid) { |
132 | WARN_ON(1); | 121 | mutex_unlock(&root->fs_info->trans_mutex); |
133 | } | 122 | return 0; |
134 | root->last_trans = running_trans_id; | ||
135 | } | 123 | } |
124 | |||
125 | record_root_in_trans(trans, root); | ||
126 | mutex_unlock(&root->fs_info->trans_mutex); | ||
136 | return 0; | 127 | return 0; |
137 | } | 128 | } |
138 | 129 | ||
@@ -181,7 +172,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
181 | ret = join_transaction(root); | 172 | ret = join_transaction(root); |
182 | BUG_ON(ret); | 173 | BUG_ON(ret); |
183 | 174 | ||
184 | btrfs_record_root_in_trans(root); | ||
185 | h->transid = root->fs_info->running_transaction->transid; | 175 | h->transid = root->fs_info->running_transaction->transid; |
186 | h->transaction = root->fs_info->running_transaction; | 176 | h->transaction = root->fs_info->running_transaction; |
187 | h->blocks_reserved = num_blocks; | 177 | h->blocks_reserved = num_blocks; |
@@ -192,6 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
192 | h->delayed_ref_updates = 0; | 182 | h->delayed_ref_updates = 0; |
193 | 183 | ||
194 | root->fs_info->running_transaction->use_count++; | 184 | root->fs_info->running_transaction->use_count++; |
185 | record_root_in_trans(h, root); | ||
195 | mutex_unlock(&root->fs_info->trans_mutex); | 186 | mutex_unlock(&root->fs_info->trans_mutex); |
196 | return h; | 187 | return h; |
197 | } | 188 | } |
@@ -233,6 +224,7 @@ static noinline int wait_for_commit(struct btrfs_root *root, | |||
233 | return 0; | 224 | return 0; |
234 | } | 225 | } |
235 | 226 | ||
227 | #if 0 | ||
236 | /* | 228 | /* |
237 | * rate limit against the drop_snapshot code. This helps to slow down new | 229 | * rate limit against the drop_snapshot code. This helps to slow down new |
238 | * operations if the drop_snapshot code isn't able to keep up. | 230 | * operations if the drop_snapshot code isn't able to keep up. |
@@ -273,6 +265,7 @@ harder: | |||
273 | goto harder; | 265 | goto harder; |
274 | } | 266 | } |
275 | } | 267 | } |
268 | #endif | ||
276 | 269 | ||
277 | void btrfs_throttle(struct btrfs_root *root) | 270 | void btrfs_throttle(struct btrfs_root *root) |
278 | { | 271 | { |
@@ -280,7 +273,6 @@ void btrfs_throttle(struct btrfs_root *root) | |||
280 | if (!root->fs_info->open_ioctl_trans) | 273 | if (!root->fs_info->open_ioctl_trans) |
281 | wait_current_trans(root); | 274 | wait_current_trans(root); |
282 | mutex_unlock(&root->fs_info->trans_mutex); | 275 | mutex_unlock(&root->fs_info->trans_mutex); |
283 | throttle_on_drops(root); | ||
284 | } | 276 | } |
285 | 277 | ||
286 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 278 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
@@ -323,9 +315,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
323 | memset(trans, 0, sizeof(*trans)); | 315 | memset(trans, 0, sizeof(*trans)); |
324 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 316 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
325 | 317 | ||
326 | if (throttle) | ||
327 | throttle_on_drops(root); | ||
328 | |||
329 | return 0; | 318 | return 0; |
330 | } | 319 | } |
331 | 320 | ||
@@ -462,12 +451,8 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
462 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 451 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
463 | if (old_root_bytenr == root->node->start) | 452 | if (old_root_bytenr == root->node->start) |
464 | break; | 453 | break; |
465 | btrfs_set_root_bytenr(&root->root_item, | ||
466 | root->node->start); | ||
467 | btrfs_set_root_level(&root->root_item, | ||
468 | btrfs_header_level(root->node)); | ||
469 | btrfs_set_root_generation(&root->root_item, trans->transid); | ||
470 | 454 | ||
455 | btrfs_set_root_node(&root->root_item, root->node); | ||
471 | ret = btrfs_update_root(trans, tree_root, | 456 | ret = btrfs_update_root(trans, tree_root, |
472 | &root->root_key, | 457 | &root->root_key, |
473 | &root->root_item); | 458 | &root->root_item); |
@@ -477,14 +462,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
477 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 462 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
478 | BUG_ON(ret); | 463 | BUG_ON(ret); |
479 | } | 464 | } |
465 | free_extent_buffer(root->commit_root); | ||
466 | root->commit_root = btrfs_root_node(root); | ||
480 | return 0; | 467 | return 0; |
481 | } | 468 | } |
482 | 469 | ||
483 | /* | 470 | /* |
484 | * update all the cowonly tree roots on disk | 471 | * update all the cowonly tree roots on disk |
485 | */ | 472 | */ |
486 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 473 | static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, |
487 | struct btrfs_root *root) | 474 | struct btrfs_root *root) |
488 | { | 475 | { |
489 | struct btrfs_fs_info *fs_info = root->fs_info; | 476 | struct btrfs_fs_info *fs_info = root->fs_info; |
490 | struct list_head *next; | 477 | struct list_head *next; |
@@ -520,118 +507,54 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
520 | * a dirty root struct and adds it into the list of dead roots that need to | 507 | * a dirty root struct and adds it into the list of dead roots that need to |
521 | * be deleted | 508 | * be deleted |
522 | */ | 509 | */ |
523 | int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) | 510 | int btrfs_add_dead_root(struct btrfs_root *root) |
524 | { | 511 | { |
525 | struct btrfs_dirty_root *dirty; | ||
526 | |||
527 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); | ||
528 | if (!dirty) | ||
529 | return -ENOMEM; | ||
530 | dirty->root = root; | ||
531 | dirty->latest_root = latest; | ||
532 | |||
533 | mutex_lock(&root->fs_info->trans_mutex); | 512 | mutex_lock(&root->fs_info->trans_mutex); |
534 | list_add(&dirty->list, &latest->fs_info->dead_roots); | 513 | list_add(&root->root_list, &root->fs_info->dead_roots); |
535 | mutex_unlock(&root->fs_info->trans_mutex); | 514 | mutex_unlock(&root->fs_info->trans_mutex); |
536 | return 0; | 515 | return 0; |
537 | } | 516 | } |
538 | 517 | ||
539 | /* | 518 | /* |
540 | * at transaction commit time we need to schedule the old roots for | 519 | * update all the cowonly tree roots on disk |
541 | * deletion via btrfs_drop_snapshot. This runs through all the | ||
542 | * reference counted roots that were modified in the current | ||
543 | * transaction and puts them into the drop list | ||
544 | */ | 520 | */ |
545 | static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | 521 | static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, |
546 | struct radix_tree_root *radix, | 522 | struct btrfs_root *root) |
547 | struct list_head *list) | ||
548 | { | 523 | { |
549 | struct btrfs_dirty_root *dirty; | ||
550 | struct btrfs_root *gang[8]; | 524 | struct btrfs_root *gang[8]; |
551 | struct btrfs_root *root; | 525 | struct btrfs_fs_info *fs_info = root->fs_info; |
552 | int i; | 526 | int i; |
553 | int ret; | 527 | int ret; |
554 | int err = 0; | 528 | int err = 0; |
555 | u32 refs; | ||
556 | 529 | ||
557 | while (1) { | 530 | while (1) { |
558 | ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, | 531 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, |
532 | (void **)gang, 0, | ||
559 | ARRAY_SIZE(gang), | 533 | ARRAY_SIZE(gang), |
560 | BTRFS_ROOT_TRANS_TAG); | 534 | BTRFS_ROOT_TRANS_TAG); |
561 | if (ret == 0) | 535 | if (ret == 0) |
562 | break; | 536 | break; |
563 | for (i = 0; i < ret; i++) { | 537 | for (i = 0; i < ret; i++) { |
564 | root = gang[i]; | 538 | root = gang[i]; |
565 | radix_tree_tag_clear(radix, | 539 | radix_tree_tag_clear(&fs_info->fs_roots_radix, |
566 | (unsigned long)root->root_key.objectid, | 540 | (unsigned long)root->root_key.objectid, |
567 | BTRFS_ROOT_TRANS_TAG); | 541 | BTRFS_ROOT_TRANS_TAG); |
568 | |||
569 | BUG_ON(!root->ref_tree); | ||
570 | dirty = root->dirty_root; | ||
571 | 542 | ||
572 | btrfs_free_log(trans, root); | 543 | btrfs_free_log(trans, root); |
573 | btrfs_free_reloc_root(trans, root); | 544 | btrfs_update_reloc_root(trans, root); |
574 | |||
575 | if (root->commit_root == root->node) { | ||
576 | WARN_ON(root->node->start != | ||
577 | btrfs_root_bytenr(&root->root_item)); | ||
578 | |||
579 | free_extent_buffer(root->commit_root); | ||
580 | root->commit_root = NULL; | ||
581 | root->dirty_root = NULL; | ||
582 | |||
583 | spin_lock(&root->list_lock); | ||
584 | list_del_init(&dirty->root->dead_list); | ||
585 | spin_unlock(&root->list_lock); | ||
586 | 545 | ||
587 | kfree(dirty->root); | 546 | if (root->commit_root == root->node) |
588 | kfree(dirty); | ||
589 | |||
590 | /* make sure to update the root on disk | ||
591 | * so we get any updates to the block used | ||
592 | * counts | ||
593 | */ | ||
594 | err = btrfs_update_root(trans, | ||
595 | root->fs_info->tree_root, | ||
596 | &root->root_key, | ||
597 | &root->root_item); | ||
598 | continue; | 547 | continue; |
599 | } | ||
600 | 548 | ||
601 | memset(&root->root_item.drop_progress, 0, | 549 | free_extent_buffer(root->commit_root); |
602 | sizeof(struct btrfs_disk_key)); | 550 | root->commit_root = btrfs_root_node(root); |
603 | root->root_item.drop_level = 0; | 551 | |
604 | root->commit_root = NULL; | 552 | btrfs_set_root_node(&root->root_item, root->node); |
605 | root->dirty_root = NULL; | 553 | err = btrfs_update_root(trans, fs_info->tree_root, |
606 | root->root_key.offset = root->fs_info->generation; | ||
607 | btrfs_set_root_bytenr(&root->root_item, | ||
608 | root->node->start); | ||
609 | btrfs_set_root_level(&root->root_item, | ||
610 | btrfs_header_level(root->node)); | ||
611 | btrfs_set_root_generation(&root->root_item, | ||
612 | root->root_key.offset); | ||
613 | |||
614 | err = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
615 | &root->root_key, | 554 | &root->root_key, |
616 | &root->root_item); | 555 | &root->root_item); |
617 | if (err) | 556 | if (err) |
618 | break; | 557 | break; |
619 | |||
620 | refs = btrfs_root_refs(&dirty->root->root_item); | ||
621 | btrfs_set_root_refs(&dirty->root->root_item, refs - 1); | ||
622 | err = btrfs_update_root(trans, root->fs_info->tree_root, | ||
623 | &dirty->root->root_key, | ||
624 | &dirty->root->root_item); | ||
625 | |||
626 | BUG_ON(err); | ||
627 | if (refs == 1) { | ||
628 | list_add(&dirty->list, list); | ||
629 | } else { | ||
630 | WARN_ON(1); | ||
631 | free_extent_buffer(dirty->root->node); | ||
632 | kfree(dirty->root); | ||
633 | kfree(dirty); | ||
634 | } | ||
635 | } | 558 | } |
636 | } | 559 | } |
637 | return err; | 560 | return err; |
@@ -688,12 +611,8 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | |||
688 | TASK_UNINTERRUPTIBLE); | 611 | TASK_UNINTERRUPTIBLE); |
689 | mutex_unlock(&info->trans_mutex); | 612 | mutex_unlock(&info->trans_mutex); |
690 | 613 | ||
691 | atomic_dec(&info->throttles); | ||
692 | wake_up(&info->transaction_throttle); | ||
693 | |||
694 | schedule(); | 614 | schedule(); |
695 | 615 | ||
696 | atomic_inc(&info->throttles); | ||
697 | mutex_lock(&info->trans_mutex); | 616 | mutex_lock(&info->trans_mutex); |
698 | finish_wait(&info->transaction_wait, &wait); | 617 | finish_wait(&info->transaction_wait, &wait); |
699 | } | 618 | } |
@@ -705,111 +624,61 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | |||
705 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | 624 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on |
706 | * all of them | 625 | * all of them |
707 | */ | 626 | */ |
708 | static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | 627 | int btrfs_drop_dead_root(struct btrfs_root *root) |
709 | struct list_head *list) | ||
710 | { | 628 | { |
711 | struct btrfs_dirty_root *dirty; | ||
712 | struct btrfs_trans_handle *trans; | 629 | struct btrfs_trans_handle *trans; |
630 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
713 | unsigned long nr; | 631 | unsigned long nr; |
714 | u64 num_bytes; | 632 | int ret; |
715 | u64 bytes_used; | ||
716 | u64 max_useless; | ||
717 | int ret = 0; | ||
718 | int err; | ||
719 | |||
720 | while (!list_empty(list)) { | ||
721 | struct btrfs_root *root; | ||
722 | |||
723 | dirty = list_entry(list->prev, struct btrfs_dirty_root, list); | ||
724 | list_del_init(&dirty->list); | ||
725 | |||
726 | num_bytes = btrfs_root_used(&dirty->root->root_item); | ||
727 | root = dirty->latest_root; | ||
728 | atomic_inc(&root->fs_info->throttles); | ||
729 | |||
730 | while (1) { | ||
731 | /* | ||
732 | * we don't want to jump in and create a bunch of | ||
733 | * delayed refs if the transaction is starting to close | ||
734 | */ | ||
735 | wait_transaction_pre_flush(tree_root->fs_info); | ||
736 | trans = btrfs_start_transaction(tree_root, 1); | ||
737 | |||
738 | /* | ||
739 | * we've joined a transaction, make sure it isn't | ||
740 | * closing right now | ||
741 | */ | ||
742 | if (trans->transaction->delayed_refs.flushing) { | ||
743 | btrfs_end_transaction(trans, tree_root); | ||
744 | continue; | ||
745 | } | ||
746 | |||
747 | mutex_lock(&root->fs_info->drop_mutex); | ||
748 | ret = btrfs_drop_snapshot(trans, dirty->root); | ||
749 | if (ret != -EAGAIN) | ||
750 | break; | ||
751 | mutex_unlock(&root->fs_info->drop_mutex); | ||
752 | 633 | ||
753 | err = btrfs_update_root(trans, | 634 | while (1) { |
754 | tree_root, | 635 | /* |
755 | &dirty->root->root_key, | 636 | * we don't want to jump in and create a bunch of |
756 | &dirty->root->root_item); | 637 | * delayed refs if the transaction is starting to close |
757 | if (err) | 638 | */ |
758 | ret = err; | 639 | wait_transaction_pre_flush(tree_root->fs_info); |
759 | nr = trans->blocks_used; | 640 | trans = btrfs_start_transaction(tree_root, 1); |
760 | ret = btrfs_end_transaction(trans, tree_root); | ||
761 | BUG_ON(ret); | ||
762 | 641 | ||
763 | btrfs_btree_balance_dirty(tree_root, nr); | 642 | /* |
764 | cond_resched(); | 643 | * we've joined a transaction, make sure it isn't |
644 | * closing right now | ||
645 | */ | ||
646 | if (trans->transaction->delayed_refs.flushing) { | ||
647 | btrfs_end_transaction(trans, tree_root); | ||
648 | continue; | ||
765 | } | 649 | } |
766 | BUG_ON(ret); | ||
767 | atomic_dec(&root->fs_info->throttles); | ||
768 | wake_up(&root->fs_info->transaction_throttle); | ||
769 | 650 | ||
770 | num_bytes -= btrfs_root_used(&dirty->root->root_item); | 651 | ret = btrfs_drop_snapshot(trans, root); |
771 | bytes_used = btrfs_root_used(&root->root_item); | 652 | if (ret != -EAGAIN) |
772 | if (num_bytes) { | 653 | break; |
773 | mutex_lock(&root->fs_info->trans_mutex); | ||
774 | btrfs_record_root_in_trans(root); | ||
775 | mutex_unlock(&root->fs_info->trans_mutex); | ||
776 | btrfs_set_root_used(&root->root_item, | ||
777 | bytes_used - num_bytes); | ||
778 | } | ||
779 | 654 | ||
780 | ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); | 655 | ret = btrfs_update_root(trans, tree_root, |
781 | if (ret) { | 656 | &root->root_key, |
782 | BUG(); | 657 | &root->root_item); |
658 | if (ret) | ||
783 | break; | 659 | break; |
784 | } | ||
785 | mutex_unlock(&root->fs_info->drop_mutex); | ||
786 | |||
787 | spin_lock(&root->list_lock); | ||
788 | list_del_init(&dirty->root->dead_list); | ||
789 | if (!list_empty(&root->dead_list)) { | ||
790 | struct btrfs_root *oldest; | ||
791 | oldest = list_entry(root->dead_list.prev, | ||
792 | struct btrfs_root, dead_list); | ||
793 | max_useless = oldest->root_key.offset - 1; | ||
794 | } else { | ||
795 | max_useless = root->root_key.offset - 1; | ||
796 | } | ||
797 | spin_unlock(&root->list_lock); | ||
798 | 660 | ||
799 | nr = trans->blocks_used; | 661 | nr = trans->blocks_used; |
800 | ret = btrfs_end_transaction(trans, tree_root); | 662 | ret = btrfs_end_transaction(trans, tree_root); |
801 | BUG_ON(ret); | 663 | BUG_ON(ret); |
802 | 664 | ||
803 | ret = btrfs_remove_leaf_refs(root, max_useless, 0); | ||
804 | BUG_ON(ret); | ||
805 | |||
806 | free_extent_buffer(dirty->root->node); | ||
807 | kfree(dirty->root); | ||
808 | kfree(dirty); | ||
809 | |||
810 | btrfs_btree_balance_dirty(tree_root, nr); | 665 | btrfs_btree_balance_dirty(tree_root, nr); |
811 | cond_resched(); | 666 | cond_resched(); |
812 | } | 667 | } |
668 | BUG_ON(ret); | ||
669 | |||
670 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | ||
671 | BUG_ON(ret); | ||
672 | |||
673 | nr = trans->blocks_used; | ||
674 | ret = btrfs_end_transaction(trans, tree_root); | ||
675 | BUG_ON(ret); | ||
676 | |||
677 | free_extent_buffer(root->node); | ||
678 | free_extent_buffer(root->commit_root); | ||
679 | kfree(root); | ||
680 | |||
681 | btrfs_btree_balance_dirty(tree_root, nr); | ||
813 | return ret; | 682 | return ret; |
814 | } | 683 | } |
815 | 684 | ||
@@ -839,24 +708,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
839 | if (ret) | 708 | if (ret) |
840 | goto fail; | 709 | goto fail; |
841 | 710 | ||
842 | btrfs_record_root_in_trans(root); | 711 | record_root_in_trans(trans, root); |
843 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 712 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
844 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 713 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
845 | 714 | ||
846 | key.objectid = objectid; | 715 | key.objectid = objectid; |
847 | key.offset = trans->transid; | 716 | key.offset = 0; |
848 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 717 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
849 | 718 | ||
850 | old = btrfs_lock_root_node(root); | 719 | old = btrfs_lock_root_node(root); |
851 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 720 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
721 | btrfs_set_lock_blocking(old); | ||
852 | 722 | ||
853 | btrfs_copy_root(trans, root, old, &tmp, objectid); | 723 | btrfs_copy_root(trans, root, old, &tmp, objectid); |
854 | btrfs_tree_unlock(old); | 724 | btrfs_tree_unlock(old); |
855 | free_extent_buffer(old); | 725 | free_extent_buffer(old); |
856 | 726 | ||
857 | btrfs_set_root_bytenr(new_root_item, tmp->start); | 727 | btrfs_set_root_node(new_root_item, tmp); |
858 | btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); | ||
859 | btrfs_set_root_generation(new_root_item, trans->transid); | ||
860 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 728 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, |
861 | new_root_item); | 729 | new_root_item); |
862 | btrfs_tree_unlock(tmp); | 730 | btrfs_tree_unlock(tmp); |
@@ -964,6 +832,24 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, | |||
964 | return 0; | 832 | return 0; |
965 | } | 833 | } |
966 | 834 | ||
835 | static void update_super_roots(struct btrfs_root *root) | ||
836 | { | ||
837 | struct btrfs_root_item *root_item; | ||
838 | struct btrfs_super_block *super; | ||
839 | |||
840 | super = &root->fs_info->super_copy; | ||
841 | |||
842 | root_item = &root->fs_info->chunk_root->root_item; | ||
843 | super->chunk_root = root_item->bytenr; | ||
844 | super->chunk_root_generation = root_item->generation; | ||
845 | super->chunk_root_level = root_item->level; | ||
846 | |||
847 | root_item = &root->fs_info->tree_root->root_item; | ||
848 | super->root = root_item->bytenr; | ||
849 | super->generation = root_item->generation; | ||
850 | super->root_level = root_item->level; | ||
851 | } | ||
852 | |||
967 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 853 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
968 | struct btrfs_root *root) | 854 | struct btrfs_root *root) |
969 | { | 855 | { |
@@ -971,8 +857,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
971 | unsigned long timeout = 1; | 857 | unsigned long timeout = 1; |
972 | struct btrfs_transaction *cur_trans; | 858 | struct btrfs_transaction *cur_trans; |
973 | struct btrfs_transaction *prev_trans = NULL; | 859 | struct btrfs_transaction *prev_trans = NULL; |
974 | struct btrfs_root *chunk_root = root->fs_info->chunk_root; | ||
975 | struct list_head dirty_fs_roots; | ||
976 | struct extent_io_tree *pinned_copy; | 860 | struct extent_io_tree *pinned_copy; |
977 | DEFINE_WAIT(wait); | 861 | DEFINE_WAIT(wait); |
978 | int ret; | 862 | int ret; |
@@ -999,7 +883,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
999 | BUG_ON(ret); | 883 | BUG_ON(ret); |
1000 | 884 | ||
1001 | mutex_lock(&root->fs_info->trans_mutex); | 885 | mutex_lock(&root->fs_info->trans_mutex); |
1002 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
1003 | if (cur_trans->in_commit) { | 886 | if (cur_trans->in_commit) { |
1004 | cur_trans->use_count++; | 887 | cur_trans->use_count++; |
1005 | mutex_unlock(&root->fs_info->trans_mutex); | 888 | mutex_unlock(&root->fs_info->trans_mutex); |
@@ -1105,41 +988,36 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1105 | * with the tree-log code. | 988 | * with the tree-log code. |
1106 | */ | 989 | */ |
1107 | mutex_lock(&root->fs_info->tree_log_mutex); | 990 | mutex_lock(&root->fs_info->tree_log_mutex); |
1108 | /* | ||
1109 | * keep tree reloc code from adding new reloc trees | ||
1110 | */ | ||
1111 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
1112 | |||
1113 | 991 | ||
1114 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, | 992 | ret = commit_fs_roots(trans, root); |
1115 | &dirty_fs_roots); | ||
1116 | BUG_ON(ret); | 993 | BUG_ON(ret); |
1117 | 994 | ||
1118 | /* add_dirty_roots gets rid of all the tree log roots, it is now | 995 | /* commit_fs_roots gets rid of all the tree log roots, it is now |
1119 | * safe to free the root of tree log roots | 996 | * safe to free the root of tree log roots |
1120 | */ | 997 | */ |
1121 | btrfs_free_log_root_tree(trans, root->fs_info); | 998 | btrfs_free_log_root_tree(trans, root->fs_info); |
1122 | 999 | ||
1123 | ret = btrfs_commit_tree_roots(trans, root); | 1000 | ret = commit_cowonly_roots(trans, root); |
1124 | BUG_ON(ret); | 1001 | BUG_ON(ret); |
1125 | 1002 | ||
1126 | cur_trans = root->fs_info->running_transaction; | 1003 | cur_trans = root->fs_info->running_transaction; |
1127 | spin_lock(&root->fs_info->new_trans_lock); | 1004 | spin_lock(&root->fs_info->new_trans_lock); |
1128 | root->fs_info->running_transaction = NULL; | 1005 | root->fs_info->running_transaction = NULL; |
1129 | spin_unlock(&root->fs_info->new_trans_lock); | 1006 | spin_unlock(&root->fs_info->new_trans_lock); |
1130 | btrfs_set_super_generation(&root->fs_info->super_copy, | 1007 | |
1131 | cur_trans->transid); | 1008 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
1132 | btrfs_set_super_root(&root->fs_info->super_copy, | 1009 | root->fs_info->tree_root->node); |
1133 | root->fs_info->tree_root->node->start); | 1010 | free_extent_buffer(root->fs_info->tree_root->commit_root); |
1134 | btrfs_set_super_root_level(&root->fs_info->super_copy, | 1011 | root->fs_info->tree_root->commit_root = |
1135 | btrfs_header_level(root->fs_info->tree_root->node)); | 1012 | btrfs_root_node(root->fs_info->tree_root); |
1136 | 1013 | ||
1137 | btrfs_set_super_chunk_root(&root->fs_info->super_copy, | 1014 | btrfs_set_root_node(&root->fs_info->chunk_root->root_item, |
1138 | chunk_root->node->start); | 1015 | root->fs_info->chunk_root->node); |
1139 | btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, | 1016 | free_extent_buffer(root->fs_info->chunk_root->commit_root); |
1140 | btrfs_header_level(chunk_root->node)); | 1017 | root->fs_info->chunk_root->commit_root = |
1141 | btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, | 1018 | btrfs_root_node(root->fs_info->chunk_root); |
1142 | btrfs_header_generation(chunk_root->node)); | 1019 | |
1020 | update_super_roots(root); | ||
1143 | 1021 | ||
1144 | if (!root->fs_info->log_root_recovering) { | 1022 | if (!root->fs_info->log_root_recovering) { |
1145 | btrfs_set_super_log_root(&root->fs_info->super_copy, 0); | 1023 | btrfs_set_super_log_root(&root->fs_info->super_copy, 0); |
@@ -1153,7 +1031,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1153 | 1031 | ||
1154 | trans->transaction->blocked = 0; | 1032 | trans->transaction->blocked = 0; |
1155 | 1033 | ||
1156 | wake_up(&root->fs_info->transaction_throttle); | ||
1157 | wake_up(&root->fs_info->transaction_wait); | 1034 | wake_up(&root->fs_info->transaction_wait); |
1158 | 1035 | ||
1159 | mutex_unlock(&root->fs_info->trans_mutex); | 1036 | mutex_unlock(&root->fs_info->trans_mutex); |
@@ -1170,9 +1047,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1170 | btrfs_finish_extent_commit(trans, root, pinned_copy); | 1047 | btrfs_finish_extent_commit(trans, root, pinned_copy); |
1171 | kfree(pinned_copy); | 1048 | kfree(pinned_copy); |
1172 | 1049 | ||
1173 | btrfs_drop_dead_reloc_roots(root); | ||
1174 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
1175 | |||
1176 | /* do the directory inserts of any pending snapshot creations */ | 1050 | /* do the directory inserts of any pending snapshot creations */ |
1177 | finish_pending_snapshots(trans, root->fs_info); | 1051 | finish_pending_snapshots(trans, root->fs_info); |
1178 | 1052 | ||
@@ -1186,16 +1060,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1186 | put_transaction(cur_trans); | 1060 | put_transaction(cur_trans); |
1187 | put_transaction(cur_trans); | 1061 | put_transaction(cur_trans); |
1188 | 1062 | ||
1189 | list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); | ||
1190 | if (root->fs_info->closing) | ||
1191 | list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); | ||
1192 | |||
1193 | mutex_unlock(&root->fs_info->trans_mutex); | 1063 | mutex_unlock(&root->fs_info->trans_mutex); |
1194 | 1064 | ||
1195 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1065 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
1196 | |||
1197 | if (root->fs_info->closing) | ||
1198 | drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); | ||
1199 | return ret; | 1066 | return ret; |
1200 | } | 1067 | } |
1201 | 1068 | ||
@@ -1204,16 +1071,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1204 | */ | 1071 | */ |
1205 | int btrfs_clean_old_snapshots(struct btrfs_root *root) | 1072 | int btrfs_clean_old_snapshots(struct btrfs_root *root) |
1206 | { | 1073 | { |
1207 | struct list_head dirty_roots; | 1074 | LIST_HEAD(list); |
1208 | INIT_LIST_HEAD(&dirty_roots); | 1075 | struct btrfs_fs_info *fs_info = root->fs_info; |
1209 | again: | 1076 | |
1210 | mutex_lock(&root->fs_info->trans_mutex); | 1077 | mutex_lock(&fs_info->trans_mutex); |
1211 | list_splice_init(&root->fs_info->dead_roots, &dirty_roots); | 1078 | list_splice_init(&fs_info->dead_roots, &list); |
1212 | mutex_unlock(&root->fs_info->trans_mutex); | 1079 | mutex_unlock(&fs_info->trans_mutex); |
1213 | 1080 | ||
1214 | if (!list_empty(&dirty_roots)) { | 1081 | while (!list_empty(&list)) { |
1215 | drop_dirty_roots(root, &dirty_roots); | 1082 | root = list_entry(list.next, struct btrfs_root, root_list); |
1216 | goto again; | 1083 | list_del_init(&root->root_list); |
1084 | btrfs_drop_dead_root(root); | ||
1217 | } | 1085 | } |
1218 | return 0; | 1086 | return 0; |
1219 | } | 1087 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 94f5bde2b58d..961c3ee5a2e1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -62,12 +62,6 @@ struct btrfs_pending_snapshot { | |||
62 | struct list_head list; | 62 | struct list_head list; |
63 | }; | 63 | }; |
64 | 64 | ||
65 | struct btrfs_dirty_root { | ||
66 | struct list_head list; | ||
67 | struct btrfs_root *root; | ||
68 | struct btrfs_root *latest_root; | ||
69 | }; | ||
70 | |||
71 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, | 65 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, |
72 | struct inode *inode) | 66 | struct inode *inode) |
73 | { | 67 | { |
@@ -100,7 +94,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | |||
100 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 94 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
101 | struct btrfs_root *root); | 95 | struct btrfs_root *root); |
102 | 96 | ||
103 | int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); | 97 | int btrfs_add_dead_root(struct btrfs_root *root); |
98 | int btrfs_drop_dead_root(struct btrfs_root *root); | ||
104 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); | 99 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); |
105 | int btrfs_clean_old_snapshots(struct btrfs_root *root); | 100 | int btrfs_clean_old_snapshots(struct btrfs_root *root); |
106 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 101 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
@@ -108,7 +103,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
108 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 103 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
109 | struct btrfs_root *root); | 104 | struct btrfs_root *root); |
110 | void btrfs_throttle(struct btrfs_root *root); | 105 | void btrfs_throttle(struct btrfs_root *root); |
111 | int btrfs_record_root_in_trans(struct btrfs_root *root); | 106 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
107 | struct btrfs_root *root); | ||
112 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 108 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, |
113 | struct extent_io_tree *dirty_pages); | 109 | struct extent_io_tree *dirty_pages); |
114 | #endif | 110 | #endif |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index db5e212e8445..c13922206d1b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -430,18 +430,16 @@ no_copy: | |||
430 | static noinline struct inode *read_one_inode(struct btrfs_root *root, | 430 | static noinline struct inode *read_one_inode(struct btrfs_root *root, |
431 | u64 objectid) | 431 | u64 objectid) |
432 | { | 432 | { |
433 | struct btrfs_key key; | ||
433 | struct inode *inode; | 434 | struct inode *inode; |
434 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
435 | if (inode->i_state & I_NEW) { | ||
436 | BTRFS_I(inode)->root = root; | ||
437 | BTRFS_I(inode)->location.objectid = objectid; | ||
438 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
439 | BTRFS_I(inode)->location.offset = 0; | ||
440 | btrfs_read_locked_inode(inode); | ||
441 | unlock_new_inode(inode); | ||
442 | 435 | ||
443 | } | 436 | key.objectid = objectid; |
444 | if (is_bad_inode(inode)) { | 437 | key.type = BTRFS_INODE_ITEM_KEY; |
438 | key.offset = 0; | ||
439 | inode = btrfs_iget(root->fs_info->sb, &key, root); | ||
440 | if (IS_ERR(inode)) { | ||
441 | inode = NULL; | ||
442 | } else if (is_bad_inode(inode)) { | ||
445 | iput(inode); | 443 | iput(inode); |
446 | inode = NULL; | 444 | inode = NULL; |
447 | } | 445 | } |
@@ -541,6 +539,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
541 | 539 | ||
542 | if (found_type == BTRFS_FILE_EXTENT_REG || | 540 | if (found_type == BTRFS_FILE_EXTENT_REG || |
543 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 541 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
542 | u64 offset; | ||
544 | unsigned long dest_offset; | 543 | unsigned long dest_offset; |
545 | struct btrfs_key ins; | 544 | struct btrfs_key ins; |
546 | 545 | ||
@@ -555,6 +554,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
555 | ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); | 554 | ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); |
556 | ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); | 555 | ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); |
557 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 556 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
557 | offset = key->offset - btrfs_file_extent_offset(eb, item); | ||
558 | 558 | ||
559 | if (ins.objectid > 0) { | 559 | if (ins.objectid > 0) { |
560 | u64 csum_start; | 560 | u64 csum_start; |
@@ -569,19 +569,16 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
569 | if (ret == 0) { | 569 | if (ret == 0) { |
570 | ret = btrfs_inc_extent_ref(trans, root, | 570 | ret = btrfs_inc_extent_ref(trans, root, |
571 | ins.objectid, ins.offset, | 571 | ins.objectid, ins.offset, |
572 | path->nodes[0]->start, | 572 | 0, root->root_key.objectid, |
573 | root->root_key.objectid, | 573 | key->objectid, offset); |
574 | trans->transid, key->objectid); | ||
575 | } else { | 574 | } else { |
576 | /* | 575 | /* |
577 | * insert the extent pointer in the extent | 576 | * insert the extent pointer in the extent |
578 | * allocation tree | 577 | * allocation tree |
579 | */ | 578 | */ |
580 | ret = btrfs_alloc_logged_extent(trans, root, | 579 | ret = btrfs_alloc_logged_file_extent(trans, |
581 | path->nodes[0]->start, | 580 | root, root->root_key.objectid, |
582 | root->root_key.objectid, | 581 | key->objectid, offset, &ins); |
583 | trans->transid, key->objectid, | ||
584 | &ins); | ||
585 | BUG_ON(ret); | 582 | BUG_ON(ret); |
586 | } | 583 | } |
587 | btrfs_release_path(root, path); | 584 | btrfs_release_path(root, path); |
@@ -1706,9 +1703,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1706 | btrfs_wait_tree_block_writeback(next); | 1703 | btrfs_wait_tree_block_writeback(next); |
1707 | btrfs_tree_unlock(next); | 1704 | btrfs_tree_unlock(next); |
1708 | 1705 | ||
1709 | ret = btrfs_drop_leaf_ref(trans, root, next); | ||
1710 | BUG_ON(ret); | ||
1711 | |||
1712 | WARN_ON(root_owner != | 1706 | WARN_ON(root_owner != |
1713 | BTRFS_TREE_LOG_OBJECTID); | 1707 | BTRFS_TREE_LOG_OBJECTID); |
1714 | ret = btrfs_free_reserved_extent(root, | 1708 | ret = btrfs_free_reserved_extent(root, |
@@ -1753,10 +1747,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1753 | btrfs_wait_tree_block_writeback(next); | 1747 | btrfs_wait_tree_block_writeback(next); |
1754 | btrfs_tree_unlock(next); | 1748 | btrfs_tree_unlock(next); |
1755 | 1749 | ||
1756 | if (*level == 0) { | ||
1757 | ret = btrfs_drop_leaf_ref(trans, root, next); | ||
1758 | BUG_ON(ret); | ||
1759 | } | ||
1760 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | 1750 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); |
1761 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | 1751 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); |
1762 | BUG_ON(ret); | 1752 | BUG_ON(ret); |
@@ -1811,12 +1801,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1811 | btrfs_wait_tree_block_writeback(next); | 1801 | btrfs_wait_tree_block_writeback(next); |
1812 | btrfs_tree_unlock(next); | 1802 | btrfs_tree_unlock(next); |
1813 | 1803 | ||
1814 | if (*level == 0) { | ||
1815 | ret = btrfs_drop_leaf_ref(trans, root, | ||
1816 | next); | ||
1817 | BUG_ON(ret); | ||
1818 | } | ||
1819 | |||
1820 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | 1804 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); |
1821 | ret = btrfs_free_reserved_extent(root, | 1805 | ret = btrfs_free_reserved_extent(root, |
1822 | path->nodes[*level]->start, | 1806 | path->nodes[*level]->start, |
@@ -1884,11 +1868,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1884 | btrfs_wait_tree_block_writeback(next); | 1868 | btrfs_wait_tree_block_writeback(next); |
1885 | btrfs_tree_unlock(next); | 1869 | btrfs_tree_unlock(next); |
1886 | 1870 | ||
1887 | if (orig_level == 0) { | ||
1888 | ret = btrfs_drop_leaf_ref(trans, log, | ||
1889 | next); | ||
1890 | BUG_ON(ret); | ||
1891 | } | ||
1892 | WARN_ON(log->root_key.objectid != | 1871 | WARN_ON(log->root_key.objectid != |
1893 | BTRFS_TREE_LOG_OBJECTID); | 1872 | BTRFS_TREE_LOG_OBJECTID); |
1894 | ret = btrfs_free_reserved_extent(log, next->start, | 1873 | ret = btrfs_free_reserved_extent(log, next->start, |
@@ -2027,9 +2006,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2027 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); | 2006 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); |
2028 | BUG_ON(ret); | 2007 | BUG_ON(ret); |
2029 | 2008 | ||
2030 | btrfs_set_root_bytenr(&log->root_item, log->node->start); | 2009 | btrfs_set_root_node(&log->root_item, log->node); |
2031 | btrfs_set_root_generation(&log->root_item, trans->transid); | ||
2032 | btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); | ||
2033 | 2010 | ||
2034 | root->log_batch = 0; | 2011 | root->log_batch = 0; |
2035 | root->log_transid++; | 2012 | root->log_transid++; |
@@ -2581,7 +2558,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2581 | ins_keys, ins_sizes, nr); | 2558 | ins_keys, ins_sizes, nr); |
2582 | BUG_ON(ret); | 2559 | BUG_ON(ret); |
2583 | 2560 | ||
2584 | for (i = 0; i < nr; i++) { | 2561 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { |
2585 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | 2562 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], |
2586 | dst_path->slots[0]); | 2563 | dst_path->slots[0]); |
2587 | 2564 | ||
@@ -2617,36 +2594,31 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2617 | found_type = btrfs_file_extent_type(src, extent); | 2594 | found_type = btrfs_file_extent_type(src, extent); |
2618 | if (found_type == BTRFS_FILE_EXTENT_REG || | 2595 | if (found_type == BTRFS_FILE_EXTENT_REG || |
2619 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 2596 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
2620 | u64 ds = btrfs_file_extent_disk_bytenr(src, | 2597 | u64 ds, dl, cs, cl; |
2621 | extent); | 2598 | ds = btrfs_file_extent_disk_bytenr(src, |
2622 | u64 dl = btrfs_file_extent_disk_num_bytes(src, | 2599 | extent); |
2623 | extent); | 2600 | /* ds == 0 is a hole */ |
2624 | u64 cs = btrfs_file_extent_offset(src, extent); | 2601 | if (ds == 0) |
2625 | u64 cl = btrfs_file_extent_num_bytes(src, | 2602 | continue; |
2626 | extent);; | 2603 | |
2604 | dl = btrfs_file_extent_disk_num_bytes(src, | ||
2605 | extent); | ||
2606 | cs = btrfs_file_extent_offset(src, extent); | ||
2607 | cl = btrfs_file_extent_num_bytes(src, | ||
2608 | extent);; | ||
2627 | if (btrfs_file_extent_compression(src, | 2609 | if (btrfs_file_extent_compression(src, |
2628 | extent)) { | 2610 | extent)) { |
2629 | cs = 0; | 2611 | cs = 0; |
2630 | cl = dl; | 2612 | cl = dl; |
2631 | } | 2613 | } |
2632 | /* ds == 0 is a hole */ | 2614 | |
2633 | if (ds != 0) { | 2615 | ret = btrfs_lookup_csums_range( |
2634 | ret = btrfs_inc_extent_ref(trans, log, | 2616 | log->fs_info->csum_root, |
2635 | ds, dl, | 2617 | ds + cs, ds + cs + cl - 1, |
2636 | dst_path->nodes[0]->start, | 2618 | &ordered_sums); |
2637 | BTRFS_TREE_LOG_OBJECTID, | 2619 | BUG_ON(ret); |
2638 | trans->transid, | ||
2639 | ins_keys[i].objectid); | ||
2640 | BUG_ON(ret); | ||
2641 | ret = btrfs_lookup_csums_range( | ||
2642 | log->fs_info->csum_root, | ||
2643 | ds + cs, ds + cs + cl - 1, | ||
2644 | &ordered_sums); | ||
2645 | BUG_ON(ret); | ||
2646 | } | ||
2647 | } | 2620 | } |
2648 | } | 2621 | } |
2649 | dst_path->slots[0]++; | ||
2650 | } | 2622 | } |
2651 | 2623 | ||
2652 | btrfs_mark_buffer_dirty(dst_path->nodes[0]); | 2624 | btrfs_mark_buffer_dirty(dst_path->nodes[0]); |
@@ -3029,9 +3001,7 @@ again: | |||
3029 | BUG_ON(!wc.replay_dest); | 3001 | BUG_ON(!wc.replay_dest); |
3030 | 3002 | ||
3031 | wc.replay_dest->log_root = log; | 3003 | wc.replay_dest->log_root = log; |
3032 | mutex_lock(&fs_info->trans_mutex); | 3004 | btrfs_record_root_in_trans(trans, wc.replay_dest); |
3033 | btrfs_record_root_in_trans(wc.replay_dest); | ||
3034 | mutex_unlock(&fs_info->trans_mutex); | ||
3035 | ret = walk_log_tree(trans, log, &wc); | 3005 | ret = walk_log_tree(trans, log, &wc); |
3036 | BUG_ON(ret); | 3006 | BUG_ON(ret); |
3037 | 3007 | ||
@@ -3049,6 +3019,7 @@ again: | |||
3049 | key.offset = found_key.offset - 1; | 3019 | key.offset = found_key.offset - 1; |
3050 | wc.replay_dest->log_root = NULL; | 3020 | wc.replay_dest->log_root = NULL; |
3051 | free_extent_buffer(log->node); | 3021 | free_extent_buffer(log->node); |
3022 | free_extent_buffer(log->commit_root); | ||
3052 | kfree(log); | 3023 | kfree(log); |
3053 | 3024 | ||
3054 | if (found_key.offset == 0) | 3025 | if (found_key.offset == 0) |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6d35b0054ca..3ab80e9cd767 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -161,8 +161,10 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
161 | int again = 0; | 161 | int again = 0; |
162 | unsigned long num_run; | 162 | unsigned long num_run; |
163 | unsigned long num_sync_run; | 163 | unsigned long num_sync_run; |
164 | unsigned long batch_run = 0; | ||
164 | unsigned long limit; | 165 | unsigned long limit; |
165 | unsigned long last_waited = 0; | 166 | unsigned long last_waited = 0; |
167 | int force_reg = 0; | ||
166 | 168 | ||
167 | bdi = blk_get_backing_dev_info(device->bdev); | 169 | bdi = blk_get_backing_dev_info(device->bdev); |
168 | fs_info = device->dev_root->fs_info; | 170 | fs_info = device->dev_root->fs_info; |
@@ -176,19 +178,22 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
176 | 178 | ||
177 | loop: | 179 | loop: |
178 | spin_lock(&device->io_lock); | 180 | spin_lock(&device->io_lock); |
179 | num_run = 0; | ||
180 | 181 | ||
181 | loop_lock: | 182 | loop_lock: |
183 | num_run = 0; | ||
182 | 184 | ||
183 | /* take all the bios off the list at once and process them | 185 | /* take all the bios off the list at once and process them |
184 | * later on (without the lock held). But, remember the | 186 | * later on (without the lock held). But, remember the |
185 | * tail and other pointers so the bios can be properly reinserted | 187 | * tail and other pointers so the bios can be properly reinserted |
186 | * into the list if we hit congestion | 188 | * into the list if we hit congestion |
187 | */ | 189 | */ |
188 | if (device->pending_sync_bios.head) | 190 | if (!force_reg && device->pending_sync_bios.head) { |
189 | pending_bios = &device->pending_sync_bios; | 191 | pending_bios = &device->pending_sync_bios; |
190 | else | 192 | force_reg = 1; |
193 | } else { | ||
191 | pending_bios = &device->pending_bios; | 194 | pending_bios = &device->pending_bios; |
195 | force_reg = 0; | ||
196 | } | ||
192 | 197 | ||
193 | pending = pending_bios->head; | 198 | pending = pending_bios->head; |
194 | tail = pending_bios->tail; | 199 | tail = pending_bios->tail; |
@@ -228,10 +233,14 @@ loop_lock: | |||
228 | while (pending) { | 233 | while (pending) { |
229 | 234 | ||
230 | rmb(); | 235 | rmb(); |
231 | if (pending_bios != &device->pending_sync_bios && | 236 | /* we want to work on both lists, but do more bios on the |
232 | device->pending_sync_bios.head && | 237 | * sync list than the regular list |
233 | num_run > 16) { | 238 | */ |
234 | cond_resched(); | 239 | if ((num_run > 32 && |
240 | pending_bios != &device->pending_sync_bios && | ||
241 | device->pending_sync_bios.head) || | ||
242 | (num_run > 64 && pending_bios == &device->pending_sync_bios && | ||
243 | device->pending_bios.head)) { | ||
235 | spin_lock(&device->io_lock); | 244 | spin_lock(&device->io_lock); |
236 | requeue_list(pending_bios, pending, tail); | 245 | requeue_list(pending_bios, pending, tail); |
237 | goto loop_lock; | 246 | goto loop_lock; |
@@ -249,6 +258,8 @@ loop_lock: | |||
249 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 258 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
250 | submit_bio(cur->bi_rw, cur); | 259 | submit_bio(cur->bi_rw, cur); |
251 | num_run++; | 260 | num_run++; |
261 | batch_run++; | ||
262 | |||
252 | if (bio_sync(cur)) | 263 | if (bio_sync(cur)) |
253 | num_sync_run++; | 264 | num_sync_run++; |
254 | 265 | ||
@@ -265,7 +276,7 @@ loop_lock: | |||
265 | * is now congested. Back off and let other work structs | 276 | * is now congested. Back off and let other work structs |
266 | * run instead | 277 | * run instead |
267 | */ | 278 | */ |
268 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 279 | if (pending && bdi_write_congested(bdi) && batch_run > 32 && |
269 | fs_info->fs_devices->open_devices > 1) { | 280 | fs_info->fs_devices->open_devices > 1) { |
270 | struct io_context *ioc; | 281 | struct io_context *ioc; |
271 | 282 | ||
@@ -366,6 +377,7 @@ static noinline int device_list_add(const char *path, | |||
366 | memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); | 377 | memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); |
367 | fs_devices->latest_devid = devid; | 378 | fs_devices->latest_devid = devid; |
368 | fs_devices->latest_trans = found_transid; | 379 | fs_devices->latest_trans = found_transid; |
380 | mutex_init(&fs_devices->device_list_mutex); | ||
369 | device = NULL; | 381 | device = NULL; |
370 | } else { | 382 | } else { |
371 | device = __find_device(&fs_devices->devices, devid, | 383 | device = __find_device(&fs_devices->devices, devid, |
@@ -392,7 +404,11 @@ static noinline int device_list_add(const char *path, | |||
392 | return -ENOMEM; | 404 | return -ENOMEM; |
393 | } | 405 | } |
394 | INIT_LIST_HEAD(&device->dev_alloc_list); | 406 | INIT_LIST_HEAD(&device->dev_alloc_list); |
407 | |||
408 | mutex_lock(&fs_devices->device_list_mutex); | ||
395 | list_add(&device->dev_list, &fs_devices->devices); | 409 | list_add(&device->dev_list, &fs_devices->devices); |
410 | mutex_unlock(&fs_devices->device_list_mutex); | ||
411 | |||
396 | device->fs_devices = fs_devices; | 412 | device->fs_devices = fs_devices; |
397 | fs_devices->num_devices++; | 413 | fs_devices->num_devices++; |
398 | } | 414 | } |
@@ -418,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
418 | INIT_LIST_HEAD(&fs_devices->devices); | 434 | INIT_LIST_HEAD(&fs_devices->devices); |
419 | INIT_LIST_HEAD(&fs_devices->alloc_list); | 435 | INIT_LIST_HEAD(&fs_devices->alloc_list); |
420 | INIT_LIST_HEAD(&fs_devices->list); | 436 | INIT_LIST_HEAD(&fs_devices->list); |
437 | mutex_init(&fs_devices->device_list_mutex); | ||
421 | fs_devices->latest_devid = orig->latest_devid; | 438 | fs_devices->latest_devid = orig->latest_devid; |
422 | fs_devices->latest_trans = orig->latest_trans; | 439 | fs_devices->latest_trans = orig->latest_trans; |
423 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); | 440 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); |
424 | 441 | ||
442 | mutex_lock(&orig->device_list_mutex); | ||
425 | list_for_each_entry(orig_dev, &orig->devices, dev_list) { | 443 | list_for_each_entry(orig_dev, &orig->devices, dev_list) { |
426 | device = kzalloc(sizeof(*device), GFP_NOFS); | 444 | device = kzalloc(sizeof(*device), GFP_NOFS); |
427 | if (!device) | 445 | if (!device) |
@@ -443,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
443 | device->fs_devices = fs_devices; | 461 | device->fs_devices = fs_devices; |
444 | fs_devices->num_devices++; | 462 | fs_devices->num_devices++; |
445 | } | 463 | } |
464 | mutex_unlock(&orig->device_list_mutex); | ||
446 | return fs_devices; | 465 | return fs_devices; |
447 | error: | 466 | error: |
467 | mutex_unlock(&orig->device_list_mutex); | ||
448 | free_fs_devices(fs_devices); | 468 | free_fs_devices(fs_devices); |
449 | return ERR_PTR(-ENOMEM); | 469 | return ERR_PTR(-ENOMEM); |
450 | } | 470 | } |
@@ -455,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) | |||
455 | 475 | ||
456 | mutex_lock(&uuid_mutex); | 476 | mutex_lock(&uuid_mutex); |
457 | again: | 477 | again: |
478 | mutex_lock(&fs_devices->device_list_mutex); | ||
458 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 479 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
459 | if (device->in_fs_metadata) | 480 | if (device->in_fs_metadata) |
460 | continue; | 481 | continue; |
@@ -474,6 +495,7 @@ again: | |||
474 | kfree(device->name); | 495 | kfree(device->name); |
475 | kfree(device); | 496 | kfree(device); |
476 | } | 497 | } |
498 | mutex_unlock(&fs_devices->device_list_mutex); | ||
477 | 499 | ||
478 | if (fs_devices->seed) { | 500 | if (fs_devices->seed) { |
479 | fs_devices = fs_devices->seed; | 501 | fs_devices = fs_devices->seed; |
@@ -594,6 +616,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
594 | device->in_fs_metadata = 0; | 616 | device->in_fs_metadata = 0; |
595 | device->mode = flags; | 617 | device->mode = flags; |
596 | 618 | ||
619 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | ||
620 | fs_devices->rotating = 1; | ||
621 | |||
597 | fs_devices->open_devices++; | 622 | fs_devices->open_devices++; |
598 | if (device->writeable) { | 623 | if (device->writeable) { |
599 | fs_devices->rw_devices++; | 624 | fs_devices->rw_devices++; |
@@ -1121,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1121 | 1146 | ||
1122 | device = NULL; | 1147 | device = NULL; |
1123 | devices = &root->fs_info->fs_devices->devices; | 1148 | devices = &root->fs_info->fs_devices->devices; |
1149 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1124 | list_for_each_entry(tmp, devices, dev_list) { | 1150 | list_for_each_entry(tmp, devices, dev_list) { |
1125 | if (tmp->in_fs_metadata && !tmp->bdev) { | 1151 | if (tmp->in_fs_metadata && !tmp->bdev) { |
1126 | device = tmp; | 1152 | device = tmp; |
1127 | break; | 1153 | break; |
1128 | } | 1154 | } |
1129 | } | 1155 | } |
1156 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1130 | bdev = NULL; | 1157 | bdev = NULL; |
1131 | bh = NULL; | 1158 | bh = NULL; |
1132 | disk_super = NULL; | 1159 | disk_super = NULL; |
@@ -1181,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1181 | goto error_brelse; | 1208 | goto error_brelse; |
1182 | 1209 | ||
1183 | device->in_fs_metadata = 0; | 1210 | device->in_fs_metadata = 0; |
1211 | |||
1212 | /* | ||
1213 | * the device list mutex makes sure that we don't change | ||
1214 | * the device list while someone else is writing out all | ||
1215 | * the device supers. | ||
1216 | */ | ||
1217 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1184 | list_del_init(&device->dev_list); | 1218 | list_del_init(&device->dev_list); |
1219 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1220 | |||
1185 | device->fs_devices->num_devices--; | 1221 | device->fs_devices->num_devices--; |
1186 | 1222 | ||
1187 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1223 | next_device = list_entry(root->fs_info->fs_devices->devices.next, |
@@ -1275,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, | |||
1275 | seed_devices->opened = 1; | 1311 | seed_devices->opened = 1; |
1276 | INIT_LIST_HEAD(&seed_devices->devices); | 1312 | INIT_LIST_HEAD(&seed_devices->devices); |
1277 | INIT_LIST_HEAD(&seed_devices->alloc_list); | 1313 | INIT_LIST_HEAD(&seed_devices->alloc_list); |
1314 | mutex_init(&seed_devices->device_list_mutex); | ||
1278 | list_splice_init(&fs_devices->devices, &seed_devices->devices); | 1315 | list_splice_init(&fs_devices->devices, &seed_devices->devices); |
1279 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); | 1316 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); |
1280 | list_for_each_entry(device, &seed_devices->devices, dev_list) { | 1317 | list_for_each_entry(device, &seed_devices->devices, dev_list) { |
@@ -1400,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1400 | mutex_lock(&root->fs_info->volume_mutex); | 1437 | mutex_lock(&root->fs_info->volume_mutex); |
1401 | 1438 | ||
1402 | devices = &root->fs_info->fs_devices->devices; | 1439 | devices = &root->fs_info->fs_devices->devices; |
1440 | /* | ||
1441 | * we have the volume lock, so we don't need the extra | ||
1442 | * device list mutex while reading the list here. | ||
1443 | */ | ||
1403 | list_for_each_entry(device, devices, dev_list) { | 1444 | list_for_each_entry(device, devices, dev_list) { |
1404 | if (device->bdev == bdev) { | 1445 | if (device->bdev == bdev) { |
1405 | ret = -EEXIST; | 1446 | ret = -EEXIST; |
@@ -1454,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1454 | } | 1495 | } |
1455 | 1496 | ||
1456 | device->fs_devices = root->fs_info->fs_devices; | 1497 | device->fs_devices = root->fs_info->fs_devices; |
1498 | |||
1499 | /* | ||
1500 | * we don't want write_supers to jump in here with our device | ||
1501 | * half setup | ||
1502 | */ | ||
1503 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1457 | list_add(&device->dev_list, &root->fs_info->fs_devices->devices); | 1504 | list_add(&device->dev_list, &root->fs_info->fs_devices->devices); |
1458 | list_add(&device->dev_alloc_list, | 1505 | list_add(&device->dev_alloc_list, |
1459 | &root->fs_info->fs_devices->alloc_list); | 1506 | &root->fs_info->fs_devices->alloc_list); |
@@ -1462,6 +1509,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1462 | root->fs_info->fs_devices->rw_devices++; | 1509 | root->fs_info->fs_devices->rw_devices++; |
1463 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1510 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
1464 | 1511 | ||
1512 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | ||
1513 | root->fs_info->fs_devices->rotating = 1; | ||
1514 | |||
1465 | total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); | 1515 | total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); |
1466 | btrfs_set_super_total_bytes(&root->fs_info->super_copy, | 1516 | btrfs_set_super_total_bytes(&root->fs_info->super_copy, |
1467 | total_bytes + device->total_bytes); | 1517 | total_bytes + device->total_bytes); |
@@ -1469,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1469 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); | 1519 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); |
1470 | btrfs_set_super_num_devices(&root->fs_info->super_copy, | 1520 | btrfs_set_super_num_devices(&root->fs_info->super_copy, |
1471 | total_bytes + 1); | 1521 | total_bytes + 1); |
1522 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1472 | 1523 | ||
1473 | if (seeding_dev) { | 1524 | if (seeding_dev) { |
1474 | ret = init_first_rw_device(trans, root, device); | 1525 | ret = init_first_rw_device(trans, root, device); |
@@ -1671,8 +1722,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1671 | int ret; | 1722 | int ret; |
1672 | int i; | 1723 | int i; |
1673 | 1724 | ||
1674 | printk(KERN_INFO "btrfs relocating chunk %llu\n", | ||
1675 | (unsigned long long)chunk_offset); | ||
1676 | root = root->fs_info->chunk_root; | 1725 | root = root->fs_info->chunk_root; |
1677 | extent_root = root->fs_info->extent_root; | 1726 | extent_root = root->fs_info->extent_root; |
1678 | em_tree = &root->fs_info->mapping_tree.map_tree; | 1727 | em_tree = &root->fs_info->mapping_tree.map_tree; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5c3ff6d02fd7..5139a833f721 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -96,7 +96,12 @@ struct btrfs_fs_devices { | |||
96 | u64 rw_devices; | 96 | u64 rw_devices; |
97 | u64 total_rw_bytes; | 97 | u64 total_rw_bytes; |
98 | struct block_device *latest_bdev; | 98 | struct block_device *latest_bdev; |
99 | /* all of the devices in the FS */ | 99 | |
100 | /* all of the devices in the FS, protected by a mutex | ||
101 | * so we can safely walk it to write out the supers without | ||
102 | * worrying about add/remove by the multi-device code | ||
103 | */ | ||
104 | struct mutex device_list_mutex; | ||
100 | struct list_head devices; | 105 | struct list_head devices; |
101 | 106 | ||
102 | /* devices not currently being allocated */ | 107 | /* devices not currently being allocated */ |
@@ -107,6 +112,11 @@ struct btrfs_fs_devices { | |||
107 | int seeding; | 112 | int seeding; |
108 | 113 | ||
109 | int opened; | 114 | int opened; |
115 | |||
116 | /* set when we find or add a device that doesn't have the | ||
117 | * nonrot flag set | ||
118 | */ | ||
119 | int rotating; | ||
110 | }; | 120 | }; |
111 | 121 | ||
112 | struct btrfs_bio_stripe { | 122 | struct btrfs_bio_stripe { |
diff --git a/fs/buffer.c b/fs/buffer.c index 49106127a4aa..a3ef091a45bd 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1085,12 +1085,12 @@ static struct buffer_head * | |||
1085 | __getblk_slow(struct block_device *bdev, sector_t block, int size) | 1085 | __getblk_slow(struct block_device *bdev, sector_t block, int size) |
1086 | { | 1086 | { |
1087 | /* Size must be multiple of hard sectorsize */ | 1087 | /* Size must be multiple of hard sectorsize */ |
1088 | if (unlikely(size & (bdev_hardsect_size(bdev)-1) || | 1088 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || |
1089 | (size < 512 || size > PAGE_SIZE))) { | 1089 | (size < 512 || size > PAGE_SIZE))) { |
1090 | printk(KERN_ERR "getblk(): invalid block size %d requested\n", | 1090 | printk(KERN_ERR "getblk(): invalid block size %d requested\n", |
1091 | size); | 1091 | size); |
1092 | printk(KERN_ERR "hardsect size: %d\n", | 1092 | printk(KERN_ERR "logical block size: %d\n", |
1093 | bdev_hardsect_size(bdev)); | 1093 | bdev_logical_block_size(bdev)); |
1094 | 1094 | ||
1095 | dump_stack(); | 1095 | dump_stack(); |
1096 | return NULL; | 1096 | return NULL; |
@@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2935 | BUG_ON(!buffer_locked(bh)); | 2935 | BUG_ON(!buffer_locked(bh)); |
2936 | BUG_ON(!buffer_mapped(bh)); | 2936 | BUG_ON(!buffer_mapped(bh)); |
2937 | BUG_ON(!bh->b_end_io); | 2937 | BUG_ON(!bh->b_end_io); |
2938 | BUG_ON(buffer_delay(bh)); | ||
2939 | BUG_ON(buffer_unwritten(bh)); | ||
2938 | 2940 | ||
2939 | /* | 2941 | /* |
2940 | * Mask in barrier bit for a write (could be either a WRITE or a | 2942 | * Mask in barrier bit for a write (could be either a WRITE or a |
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 1e962348d111..431accd475a7 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c | |||
@@ -354,7 +354,9 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) | |||
354 | /* make sure all pages pinned by operations on behalf of the netfs are | 354 | /* make sure all pages pinned by operations on behalf of the netfs are |
355 | * written to disc */ | 355 | * written to disc */ |
356 | cachefiles_begin_secure(cache, &saved_cred); | 356 | cachefiles_begin_secure(cache, &saved_cred); |
357 | ret = fsync_super(cache->mnt->mnt_sb); | 357 | down_read(&cache->mnt->mnt_sb->s_umount); |
358 | ret = sync_filesystem(cache->mnt->mnt_sb); | ||
359 | up_read(&cache->mnt->mnt_sb->s_umount); | ||
358 | cachefiles_end_secure(cache, saved_cred); | 360 | cachefiles_end_secure(cache, saved_cred); |
359 | 361 | ||
360 | if (ret == -EIO) | 362 | if (ret == -EIO) |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 38f71222a552..b7c9d5187a75 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
375 | p = inode->i_cdev; | 375 | p = inode->i_cdev; |
376 | if (!p) { | 376 | if (!p) { |
377 | inode->i_cdev = p = new; | 377 | inode->i_cdev = p = new; |
378 | inode->i_cindex = idx; | ||
379 | list_add(&inode->i_devices, &p->list); | 378 | list_add(&inode->i_devices, &p->list); |
380 | new = NULL; | 379 | new = NULL; |
381 | } else if (!cdev_get(p)) | 380 | } else if (!cdev_get(p)) |
@@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
405 | return ret; | 404 | return ret; |
406 | } | 405 | } |
407 | 406 | ||
407 | int cdev_index(struct inode *inode) | ||
408 | { | ||
409 | int idx; | ||
410 | struct kobject *kobj; | ||
411 | |||
412 | kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx); | ||
413 | if (!kobj) | ||
414 | return -1; | ||
415 | kobject_put(kobj); | ||
416 | return idx; | ||
417 | } | ||
418 | |||
408 | void cd_forget(struct inode *inode) | 419 | void cd_forget(struct inode *inode) |
409 | { | 420 | { |
410 | spin_lock(&cdev_lock); | 421 | spin_lock(&cdev_lock); |
@@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init); | |||
557 | EXPORT_SYMBOL(cdev_alloc); | 568 | EXPORT_SYMBOL(cdev_alloc); |
558 | EXPORT_SYMBOL(cdev_del); | 569 | EXPORT_SYMBOL(cdev_del); |
559 | EXPORT_SYMBOL(cdev_add); | 570 | EXPORT_SYMBOL(cdev_add); |
571 | EXPORT_SYMBOL(cdev_index); | ||
560 | EXPORT_SYMBOL(register_chrdev); | 572 | EXPORT_SYMBOL(register_chrdev); |
561 | EXPORT_SYMBOL(unregister_chrdev); | 573 | EXPORT_SYMBOL(unregister_chrdev); |
562 | EXPORT_SYMBOL(directly_mappable_cdev_bdi); | 574 | EXPORT_SYMBOL(directly_mappable_cdev_bdi); |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f20c4069c220..b48689839428 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -1,3 +1,12 @@ | |||
1 | Version 1.59 | ||
2 | ------------ | ||
3 | Client uses server inode numbers (which are persistent) rather than | ||
4 | client generated ones by default (mount option "serverino" turned | ||
5 | on by default if server supports it). Add forceuid and forcegid | ||
6 | mount options (so that when negotiating unix extensions specifying | ||
7 | which uid mounted does not immediately force the server's reported | ||
8 | uids to be overridden). | ||
9 | |||
1 | Version 1.58 | 10 | Version 1.58 |
2 | ------------ | 11 | ------------ |
3 | Guard against buffer overruns in various UCS-2 to UTF-8 string conversions | 12 | Guard against buffer overruns in various UCS-2 to UTF-8 string conversions |
@@ -10,6 +19,8 @@ we converted from). Fix endianness of the vcnum field used during | |||
10 | session setup to distinguish multiple mounts to same server from different | 19 | session setup to distinguish multiple mounts to same server from different |
11 | userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental | 20 | userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental |
12 | flag to be set to 2, and mount must enable krb5 to turn on extended security). | 21 | flag to be set to 2, and mount must enable krb5 to turn on extended security). |
22 | Performance of file create to Samba improved (posix create on lookup | ||
23 | removes 1 of 2 network requests sent on file create) | ||
13 | 24 | ||
14 | Version 1.57 | 25 | Version 1.57 |
15 | ------------ | 26 | ------------ |
diff --git a/fs/cifs/README b/fs/cifs/README index db208ddb9899..ad92921dbde4 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -262,7 +262,8 @@ A partial list of the supported mount options follows: | |||
262 | mount. | 262 | mount. |
263 | domain Set the SMB/CIFS workgroup name prepended to the | 263 | domain Set the SMB/CIFS workgroup name prepended to the |
264 | username during CIFS session establishment | 264 | username during CIFS session establishment |
265 | uid Set the default uid for inodes. For mounts to servers | 265 | forceuid Set the default uid for inodes based on the uid |
266 | passed in. For mounts to servers | ||
266 | which do support the CIFS Unix extensions, such as a | 267 | which do support the CIFS Unix extensions, such as a |
267 | properly configured Samba server, the server provides | 268 | properly configured Samba server, the server provides |
268 | the uid, gid and mode so this parameter should not be | 269 | the uid, gid and mode so this parameter should not be |
@@ -292,6 +293,12 @@ A partial list of the supported mount options follows: | |||
292 | the client. Note that the mount.cifs helper must be | 293 | the client. Note that the mount.cifs helper must be |
293 | at version 1.10 or higher to support specifying the uid | 294 | at version 1.10 or higher to support specifying the uid |
294 | (or gid) in non-numeric form. | 295 | (or gid) in non-numeric form. |
296 | forcegid (similar to above but for the groupid instead of uid) | ||
297 | uid Set the default uid for inodes, and indicate to the | ||
298 | cifs kernel driver which local user mounted . If the server | ||
299 | supports the unix extensions the default uid is | ||
300 | not used to fill in the owner fields of inodes (files) | ||
301 | unless the "forceuid" parameter is specified. | ||
295 | gid Set the default gid for inodes (similar to above). | 302 | gid Set the default gid for inodes (similar to above). |
296 | file_mode If CIFS Unix extensions are not supported by the server | 303 | file_mode If CIFS Unix extensions are not supported by the server |
297 | this overrides the default mode for file inodes. | 304 | this overrides the default mode for file inodes. |
@@ -388,8 +395,13 @@ A partial list of the supported mount options follows: | |||
388 | or the CIFS Unix Extensions equivalent and for those | 395 | or the CIFS Unix Extensions equivalent and for those |
389 | this mount option will have no effect. Exporting cifs mounts | 396 | this mount option will have no effect. Exporting cifs mounts |
390 | under nfsd requires this mount option on the cifs mount. | 397 | under nfsd requires this mount option on the cifs mount. |
398 | This is now the default if server supports the | ||
399 | required network operation. | ||
391 | noserverino Client generates inode numbers (rather than using the actual one | 400 | noserverino Client generates inode numbers (rather than using the actual one |
392 | from the server) by default. | 401 | from the server). These inode numbers will vary after |
402 | unmount or reboot which can confuse some applications, | ||
403 | but not all server filesystems support unique inode | ||
404 | numbers. | ||
393 | setuids If the CIFS Unix extensions are negotiated with the server | 405 | setuids If the CIFS Unix extensions are negotiated with the server |
394 | the client will attempt to set the effective uid and gid of | 406 | the client will attempt to set the effective uid and gid of |
395 | the local process on newly created files, directories, and | 407 | the local process on newly created files, directories, and |
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 83d62759c7c7..3bb11be8b6a8 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, | |||
275 | case -EBUSY: | 275 | case -EBUSY: |
276 | /* someone else made a mount here whilst we were busy */ | 276 | /* someone else made a mount here whilst we were busy */ |
277 | while (d_mountpoint(nd->path.dentry) && | 277 | while (d_mountpoint(nd->path.dentry) && |
278 | follow_down(&nd->path.mnt, &nd->path.dentry)) | 278 | follow_down(&nd->path)) |
279 | ; | 279 | ; |
280 | err = 0; | 280 | err = 0; |
281 | default: | 281 | default: |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 67bf93a40d2e..4a4581cb2b5e 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/string.h> | 23 | #include <linux/string.h> |
24 | #include <keys/user-type.h> | 24 | #include <keys/user-type.h> |
25 | #include <linux/key-type.h> | 25 | #include <linux/key-type.h> |
26 | #include <linux/inet.h> | ||
26 | #include "cifsglob.h" | 27 | #include "cifsglob.h" |
27 | #include "cifs_spnego.h" | 28 | #include "cifs_spnego.h" |
28 | #include "cifs_debug.h" | 29 | #include "cifs_debug.h" |
@@ -73,9 +74,6 @@ struct key_type cifs_spnego_key_type = { | |||
73 | * strlen(";sec=ntlmsspi") */ | 74 | * strlen(";sec=ntlmsspi") */ |
74 | #define MAX_MECH_STR_LEN 13 | 75 | #define MAX_MECH_STR_LEN 13 |
75 | 76 | ||
76 | /* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/128 */ | ||
77 | #define MAX_IPV6_ADDR_LEN 43 | ||
78 | |||
79 | /* strlen of "host=" */ | 77 | /* strlen of "host=" */ |
80 | #define HOST_KEY_LEN 5 | 78 | #define HOST_KEY_LEN 5 |
81 | 79 | ||
@@ -102,7 +100,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
102 | host=hostname sec=mechanism uid=0xFF user=username */ | 100 | host=hostname sec=mechanism uid=0xFF user=username */ |
103 | desc_len = MAX_VER_STR_LEN + | 101 | desc_len = MAX_VER_STR_LEN + |
104 | HOST_KEY_LEN + strlen(hostname) + | 102 | HOST_KEY_LEN + strlen(hostname) + |
105 | IP_KEY_LEN + MAX_IPV6_ADDR_LEN + | 103 | IP_KEY_LEN + INET6_ADDRSTRLEN + |
106 | MAX_MECH_STR_LEN + | 104 | MAX_MECH_STR_LEN + |
107 | UID_KEY_LEN + (sizeof(uid_t) * 2) + | 105 | UID_KEY_LEN + (sizeof(uid_t) * 2) + |
108 | USER_KEY_LEN + strlen(sesInfo->userName) + 1; | 106 | USER_KEY_LEN + strlen(sesInfo->userName) + 1; |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 57ecdc83c26f..1403b5d86a73 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -552,130 +552,138 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, | |||
552 | return rc; | 552 | return rc; |
553 | } | 553 | } |
554 | 554 | ||
555 | 555 | static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, | |
556 | /* Retrieve an ACL from the server */ | 556 | __u16 fid, u32 *pacllen) |
557 | static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, | ||
558 | const char *path, const __u16 *pfid) | ||
559 | { | 557 | { |
560 | struct cifsFileInfo *open_file = NULL; | ||
561 | bool unlock_file = false; | ||
562 | int xid; | ||
563 | int rc = -EIO; | ||
564 | __u16 fid; | ||
565 | struct super_block *sb; | ||
566 | struct cifs_sb_info *cifs_sb; | ||
567 | struct cifs_ntsd *pntsd = NULL; | 558 | struct cifs_ntsd *pntsd = NULL; |
559 | int xid, rc; | ||
560 | |||
561 | xid = GetXid(); | ||
562 | rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); | ||
563 | FreeXid(xid); | ||
568 | 564 | ||
569 | cFYI(1, ("get mode from ACL for %s", path)); | ||
570 | 565 | ||
571 | if (inode == NULL) | 566 | cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); |
572 | return NULL; | 567 | return pntsd; |
568 | } | ||
569 | |||
570 | static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, | ||
571 | const char *path, u32 *pacllen) | ||
572 | { | ||
573 | struct cifs_ntsd *pntsd = NULL; | ||
574 | int oplock = 0; | ||
575 | int xid, rc; | ||
576 | __u16 fid; | ||
573 | 577 | ||
574 | xid = GetXid(); | 578 | xid = GetXid(); |
575 | if (pfid == NULL) | ||
576 | open_file = find_readable_file(CIFS_I(inode)); | ||
577 | else | ||
578 | fid = *pfid; | ||
579 | 579 | ||
580 | sb = inode->i_sb; | 580 | rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0, |
581 | if (sb == NULL) { | 581 | &fid, &oplock, NULL, cifs_sb->local_nls, |
582 | FreeXid(xid); | 582 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
583 | return NULL; | 583 | if (rc) { |
584 | } | 584 | cERROR(1, ("Unable to open file to get ACL")); |
585 | cifs_sb = CIFS_SB(sb); | 585 | goto out; |
586 | |||
587 | if (open_file) { | ||
588 | unlock_file = true; | ||
589 | fid = open_file->netfid; | ||
590 | } else if (pfid == NULL) { | ||
591 | int oplock = 0; | ||
592 | /* open file */ | ||
593 | rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, | ||
594 | READ_CONTROL, 0, &fid, &oplock, NULL, | ||
595 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
596 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
597 | if (rc != 0) { | ||
598 | cERROR(1, ("Unable to open file to get ACL")); | ||
599 | FreeXid(xid); | ||
600 | return NULL; | ||
601 | } | ||
602 | } | 586 | } |
603 | 587 | ||
604 | rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); | 588 | rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); |
605 | cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); | 589 | cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); |
606 | if (unlock_file == true) /* find_readable_file increments ref count */ | ||
607 | atomic_dec(&open_file->wrtPending); | ||
608 | else if (pfid == NULL) /* if opened above we have to close the handle */ | ||
609 | CIFSSMBClose(xid, cifs_sb->tcon, fid); | ||
610 | /* else handle was passed in by caller */ | ||
611 | 590 | ||
591 | CIFSSMBClose(xid, cifs_sb->tcon, fid); | ||
592 | out: | ||
612 | FreeXid(xid); | 593 | FreeXid(xid); |
613 | return pntsd; | 594 | return pntsd; |
614 | } | 595 | } |
615 | 596 | ||
616 | /* Set an ACL on the server */ | 597 | /* Retrieve an ACL from the server */ |
617 | static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | 598 | static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, |
618 | struct inode *inode, const char *path) | 599 | struct inode *inode, const char *path, |
600 | u32 *pacllen) | ||
619 | { | 601 | { |
620 | struct cifsFileInfo *open_file; | 602 | struct cifs_ntsd *pntsd = NULL; |
621 | bool unlock_file = false; | 603 | struct cifsFileInfo *open_file = NULL; |
622 | int xid; | ||
623 | int rc = -EIO; | ||
624 | __u16 fid; | ||
625 | struct super_block *sb; | ||
626 | struct cifs_sb_info *cifs_sb; | ||
627 | 604 | ||
628 | cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); | 605 | if (inode) |
606 | open_file = find_readable_file(CIFS_I(inode)); | ||
607 | if (!open_file) | ||
608 | return get_cifs_acl_by_path(cifs_sb, path, pacllen); | ||
629 | 609 | ||
630 | if (!inode) | 610 | pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); |
631 | return rc; | 611 | atomic_dec(&open_file->wrtPending); |
612 | return pntsd; | ||
613 | } | ||
632 | 614 | ||
633 | sb = inode->i_sb; | 615 | static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, |
634 | if (sb == NULL) | 616 | struct cifs_ntsd *pnntsd, u32 acllen) |
635 | return rc; | 617 | { |
618 | int xid, rc; | ||
636 | 619 | ||
637 | cifs_sb = CIFS_SB(sb); | ||
638 | xid = GetXid(); | 620 | xid = GetXid(); |
621 | rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); | ||
622 | FreeXid(xid); | ||
639 | 623 | ||
640 | open_file = find_readable_file(CIFS_I(inode)); | 624 | cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); |
641 | if (open_file) { | 625 | return rc; |
642 | unlock_file = true; | 626 | } |
643 | fid = open_file->netfid; | 627 | |
644 | } else { | 628 | static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, |
645 | int oplock = 0; | 629 | struct cifs_ntsd *pnntsd, u32 acllen) |
646 | /* open file */ | 630 | { |
647 | rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, | 631 | int oplock = 0; |
648 | WRITE_DAC, 0, &fid, &oplock, NULL, | 632 | int xid, rc; |
649 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 633 | __u16 fid; |
650 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 634 | |
651 | if (rc != 0) { | 635 | xid = GetXid(); |
652 | cERROR(1, ("Unable to open file to set ACL")); | 636 | |
653 | FreeXid(xid); | 637 | rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0, |
654 | return rc; | 638 | &fid, &oplock, NULL, cifs_sb->local_nls, |
655 | } | 639 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
640 | if (rc) { | ||
641 | cERROR(1, ("Unable to open file to set ACL")); | ||
642 | goto out; | ||
656 | } | 643 | } |
657 | 644 | ||
658 | rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); | 645 | rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); |
659 | cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); | 646 | cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); |
660 | if (unlock_file) | ||
661 | atomic_dec(&open_file->wrtPending); | ||
662 | else | ||
663 | CIFSSMBClose(xid, cifs_sb->tcon, fid); | ||
664 | 647 | ||
648 | CIFSSMBClose(xid, cifs_sb->tcon, fid); | ||
649 | out: | ||
665 | FreeXid(xid); | 650 | FreeXid(xid); |
651 | return rc; | ||
652 | } | ||
666 | 653 | ||
654 | /* Set an ACL on the server */ | ||
655 | static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | ||
656 | struct inode *inode, const char *path) | ||
657 | { | ||
658 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
659 | struct cifsFileInfo *open_file; | ||
660 | int rc; | ||
661 | |||
662 | cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); | ||
663 | |||
664 | open_file = find_readable_file(CIFS_I(inode)); | ||
665 | if (!open_file) | ||
666 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); | ||
667 | |||
668 | rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); | ||
669 | atomic_dec(&open_file->wrtPending); | ||
667 | return rc; | 670 | return rc; |
668 | } | 671 | } |
669 | 672 | ||
670 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ | 673 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ |
671 | void acl_to_uid_mode(struct inode *inode, const char *path, const __u16 *pfid) | 674 | void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, |
675 | const char *path, const __u16 *pfid) | ||
672 | { | 676 | { |
673 | struct cifs_ntsd *pntsd = NULL; | 677 | struct cifs_ntsd *pntsd = NULL; |
674 | u32 acllen = 0; | 678 | u32 acllen = 0; |
675 | int rc = 0; | 679 | int rc = 0; |
676 | 680 | ||
677 | cFYI(DBG2, ("converting ACL to mode for %s", path)); | 681 | cFYI(DBG2, ("converting ACL to mode for %s", path)); |
678 | pntsd = get_cifs_acl(&acllen, inode, path, pfid); | 682 | |
683 | if (pfid) | ||
684 | pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen); | ||
685 | else | ||
686 | pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen); | ||
679 | 687 | ||
680 | /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ | 688 | /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ |
681 | if (pntsd) | 689 | if (pntsd) |
@@ -698,7 +706,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) | |||
698 | cFYI(DBG2, ("set ACL from mode for %s", path)); | 706 | cFYI(DBG2, ("set ACL from mode for %s", path)); |
699 | 707 | ||
700 | /* Get the security descriptor */ | 708 | /* Get the security descriptor */ |
701 | pntsd = get_cifs_acl(&secdesclen, inode, path, NULL); | 709 | pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); |
702 | 710 | ||
703 | /* Add three ACEs for owner, group, everyone getting rid of | 711 | /* Add three ACEs for owner, group, everyone getting rid of |
704 | other ACEs as chmod disables ACEs and set the security descriptor */ | 712 | other ACEs as chmod disables ACEs and set the security descriptor */ |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5e6d35804d73..0d92114195ab 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -146,7 +146,7 @@ cifs_read_super(struct super_block *sb, void *data, | |||
146 | #endif | 146 | #endif |
147 | sb->s_blocksize = CIFS_MAX_MSGSIZE; | 147 | sb->s_blocksize = CIFS_MAX_MSGSIZE; |
148 | sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ | 148 | sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ |
149 | inode = cifs_iget(sb, ROOT_I); | 149 | inode = cifs_root_iget(sb, ROOT_I); |
150 | 150 | ||
151 | if (IS_ERR(inode)) { | 151 | if (IS_ERR(inode)) { |
152 | rc = PTR_ERR(inode); | 152 | rc = PTR_ERR(inode); |
@@ -204,6 +204,9 @@ cifs_put_super(struct super_block *sb) | |||
204 | cFYI(1, ("Empty cifs superblock info passed to unmount")); | 204 | cFYI(1, ("Empty cifs superblock info passed to unmount")); |
205 | return; | 205 | return; |
206 | } | 206 | } |
207 | |||
208 | lock_kernel(); | ||
209 | |||
207 | rc = cifs_umount(sb, cifs_sb); | 210 | rc = cifs_umount(sb, cifs_sb); |
208 | if (rc) | 211 | if (rc) |
209 | cERROR(1, ("cifs_umount failed with return code %d", rc)); | 212 | cERROR(1, ("cifs_umount failed with return code %d", rc)); |
@@ -216,7 +219,8 @@ cifs_put_super(struct super_block *sb) | |||
216 | 219 | ||
217 | unload_nls(cifs_sb->local_nls); | 220 | unload_nls(cifs_sb->local_nls); |
218 | kfree(cifs_sb); | 221 | kfree(cifs_sb); |
219 | return; | 222 | |
223 | unlock_kernel(); | ||
220 | } | 224 | } |
221 | 225 | ||
222 | static int | 226 | static int |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 051b71cfdea9..9570a0e8023f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -36,7 +36,7 @@ extern void cifs_read_inode(struct inode *); | |||
36 | 36 | ||
37 | /* Functions related to inodes */ | 37 | /* Functions related to inodes */ |
38 | extern const struct inode_operations cifs_dir_inode_ops; | 38 | extern const struct inode_operations cifs_dir_inode_ops; |
39 | extern struct inode *cifs_iget(struct super_block *, unsigned long); | 39 | extern struct inode *cifs_root_iget(struct super_block *, unsigned long); |
40 | extern int cifs_create(struct inode *, struct dentry *, int, | 40 | extern int cifs_create(struct inode *, struct dentry *, int, |
41 | struct nameidata *); | 41 | struct nameidata *); |
42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, | 42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, |
@@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
100 | extern const struct export_operations cifs_export_ops; | 100 | extern const struct export_operations cifs_export_ops; |
101 | #endif /* EXPERIMENTAL */ | 101 | #endif /* EXPERIMENTAL */ |
102 | 102 | ||
103 | #define CIFS_VERSION "1.58" | 103 | #define CIFS_VERSION "1.59" |
104 | #endif /* _CIFSFS_H */ | 104 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index fae083930eee..f9452329bcce 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -90,10 +90,10 @@ extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, | |||
90 | struct cifsTconInfo *); | 90 | struct cifsTconInfo *); |
91 | extern void DeleteOplockQEntry(struct oplock_q_entry *); | 91 | extern void DeleteOplockQEntry(struct oplock_q_entry *); |
92 | extern void DeleteTconOplockQEntries(struct cifsTconInfo *); | 92 | extern void DeleteTconOplockQEntries(struct cifsTconInfo *); |
93 | extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601); | 93 | extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); |
94 | extern u64 cifs_UnixTimeToNT(struct timespec); | 94 | extern u64 cifs_UnixTimeToNT(struct timespec); |
95 | extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); | 95 | extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, |
96 | extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); | 96 | int offset); |
97 | 97 | ||
98 | extern int cifs_posix_open(char *full_path, struct inode **pinode, | 98 | extern int cifs_posix_open(char *full_path, struct inode **pinode, |
99 | struct super_block *sb, int mode, int oflags, | 99 | struct super_block *sb, int mode, int oflags, |
@@ -108,8 +108,8 @@ extern int cifs_get_inode_info(struct inode **pinode, | |||
108 | extern int cifs_get_inode_info_unix(struct inode **pinode, | 108 | extern int cifs_get_inode_info_unix(struct inode **pinode, |
109 | const unsigned char *search_path, | 109 | const unsigned char *search_path, |
110 | struct super_block *sb, int xid); | 110 | struct super_block *sb, int xid); |
111 | extern void acl_to_uid_mode(struct inode *inode, const char *path, | 111 | extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, |
112 | const __u16 *pfid); | 112 | const char *path, const __u16 *pfid); |
113 | extern int mode_to_acl(struct inode *inode, const char *path, __u64); | 113 | extern int mode_to_acl(struct inode *inode, const char *path, __u64); |
114 | 114 | ||
115 | extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, | 115 | extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d06260251c30..b84c61d5bca4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -524,8 +524,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
524 | int val, seconds, remain, result; | 524 | int val, seconds, remain, result; |
525 | struct timespec ts, utc; | 525 | struct timespec ts, utc; |
526 | utc = CURRENT_TIME; | 526 | utc = CURRENT_TIME; |
527 | ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date), | 527 | ts = cnvrtDosUnixTm(rsp->SrvTime.Date, |
528 | le16_to_cpu(rsp->SrvTime.Time)); | 528 | rsp->SrvTime.Time, 0); |
529 | cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d", | 529 | cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d", |
530 | (int)ts.tv_sec, (int)utc.tv_sec, | 530 | (int)ts.tv_sec, (int)utc.tv_sec, |
531 | (int)(utc.tv_sec - ts.tv_sec))); | 531 | (int)(utc.tv_sec - ts.tv_sec))); |
@@ -2427,8 +2427,7 @@ querySymLinkRetry: | |||
2427 | params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; | 2427 | params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; |
2428 | pSMB->TotalDataCount = 0; | 2428 | pSMB->TotalDataCount = 0; |
2429 | pSMB->MaxParameterCount = cpu_to_le16(2); | 2429 | pSMB->MaxParameterCount = cpu_to_le16(2); |
2430 | /* BB find exact max data count below from sess structure BB */ | 2430 | pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize); |
2431 | pSMB->MaxDataCount = cpu_to_le16(4000); | ||
2432 | pSMB->MaxSetupCount = 0; | 2431 | pSMB->MaxSetupCount = 0; |
2433 | pSMB->Reserved = 0; | 2432 | pSMB->Reserved = 0; |
2434 | pSMB->Flags = 0; | 2433 | pSMB->Flags = 0; |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4aa81a507b74..97f4311b9a8e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/namei.h> | 35 | #include <linux/namei.h> |
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | #include <asm/processor.h> | 37 | #include <asm/processor.h> |
38 | #include <linux/inet.h> | ||
38 | #include <net/ipv6.h> | 39 | #include <net/ipv6.h> |
39 | #include "cifspdu.h" | 40 | #include "cifspdu.h" |
40 | #include "cifsglob.h" | 41 | #include "cifsglob.h" |
@@ -61,7 +62,6 @@ struct smb_vol { | |||
61 | char *domainname; | 62 | char *domainname; |
62 | char *UNC; | 63 | char *UNC; |
63 | char *UNCip; | 64 | char *UNCip; |
64 | char *in6_addr; /* ipv6 address as human readable form of in6_addr */ | ||
65 | char *iocharset; /* local code page for mapping to and from Unicode */ | 65 | char *iocharset; /* local code page for mapping to and from Unicode */ |
66 | char source_rfc1001_name[16]; /* netbios name of client */ | 66 | char source_rfc1001_name[16]; /* netbios name of client */ |
67 | char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ | 67 | char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ |
@@ -827,14 +827,16 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
827 | vol->target_rfc1001_name[0] = 0; | 827 | vol->target_rfc1001_name[0] = 0; |
828 | vol->linux_uid = current_uid(); /* use current_euid() instead? */ | 828 | vol->linux_uid = current_uid(); /* use current_euid() instead? */ |
829 | vol->linux_gid = current_gid(); | 829 | vol->linux_gid = current_gid(); |
830 | vol->dir_mode = S_IRWXUGO; | 830 | |
831 | /* 2767 perms indicate mandatory locking support */ | 831 | /* default to only allowing write access to owner of the mount */ |
832 | vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP); | 832 | vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; |
833 | 833 | ||
834 | /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ | 834 | /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ |
835 | vol->rw = true; | 835 | vol->rw = true; |
836 | /* default is always to request posix paths. */ | 836 | /* default is always to request posix paths. */ |
837 | vol->posix_paths = 1; | 837 | vol->posix_paths = 1; |
838 | /* default to using server inode numbers where available */ | ||
839 | vol->server_ino = 1; | ||
838 | 840 | ||
839 | if (!options) | 841 | if (!options) |
840 | return 1; | 842 | return 1; |
@@ -955,10 +957,12 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
955 | } | 957 | } |
956 | strcpy(vol->password, value); | 958 | strcpy(vol->password, value); |
957 | } | 959 | } |
958 | } else if (strnicmp(data, "ip", 2) == 0) { | 960 | } else if (!strnicmp(data, "ip", 2) || |
961 | !strnicmp(data, "addr", 4)) { | ||
959 | if (!value || !*value) { | 962 | if (!value || !*value) { |
960 | vol->UNCip = NULL; | 963 | vol->UNCip = NULL; |
961 | } else if (strnlen(value, 35) < 35) { | 964 | } else if (strnlen(value, INET6_ADDRSTRLEN) < |
965 | INET6_ADDRSTRLEN) { | ||
962 | vol->UNCip = value; | 966 | vol->UNCip = value; |
963 | } else { | 967 | } else { |
964 | printk(KERN_WARNING "CIFS: ip address " | 968 | printk(KERN_WARNING "CIFS: ip address " |
@@ -1092,17 +1096,17 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1092 | return 1; | 1096 | return 1; |
1093 | } | 1097 | } |
1094 | } else if (strnicmp(data, "uid", 3) == 0) { | 1098 | } else if (strnicmp(data, "uid", 3) == 0) { |
1095 | if (value && *value) { | 1099 | if (value && *value) |
1096 | vol->linux_uid = | 1100 | vol->linux_uid = |
1097 | simple_strtoul(value, &value, 0); | 1101 | simple_strtoul(value, &value, 0); |
1102 | } else if (strnicmp(data, "forceuid", 8) == 0) { | ||
1098 | vol->override_uid = 1; | 1103 | vol->override_uid = 1; |
1099 | } | ||
1100 | } else if (strnicmp(data, "gid", 3) == 0) { | 1104 | } else if (strnicmp(data, "gid", 3) == 0) { |
1101 | if (value && *value) { | 1105 | if (value && *value) |
1102 | vol->linux_gid = | 1106 | vol->linux_gid = |
1103 | simple_strtoul(value, &value, 0); | 1107 | simple_strtoul(value, &value, 0); |
1108 | } else if (strnicmp(data, "forcegid", 8) == 0) { | ||
1104 | vol->override_gid = 1; | 1109 | vol->override_gid = 1; |
1105 | } | ||
1106 | } else if (strnicmp(data, "file_mode", 4) == 0) { | 1110 | } else if (strnicmp(data, "file_mode", 4) == 0) { |
1107 | if (value && *value) { | 1111 | if (value && *value) { |
1108 | vol->file_mode = | 1112 | vol->file_mode = |
@@ -1315,16 +1319,6 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1315 | vol->direct_io = 1; | 1319 | vol->direct_io = 1; |
1316 | } else if (strnicmp(data, "forcedirectio", 13) == 0) { | 1320 | } else if (strnicmp(data, "forcedirectio", 13) == 0) { |
1317 | vol->direct_io = 1; | 1321 | vol->direct_io = 1; |
1318 | } else if (strnicmp(data, "in6_addr", 8) == 0) { | ||
1319 | if (!value || !*value) { | ||
1320 | vol->in6_addr = NULL; | ||
1321 | } else if (strnlen(value, 49) == 48) { | ||
1322 | vol->in6_addr = value; | ||
1323 | } else { | ||
1324 | printk(KERN_WARNING "CIFS: ip v6 address not " | ||
1325 | "48 characters long\n"); | ||
1326 | return 1; | ||
1327 | } | ||
1328 | } else if (strnicmp(data, "noac", 4) == 0) { | 1322 | } else if (strnicmp(data, "noac", 4) == 0) { |
1329 | printk(KERN_WARNING "CIFS: Mount option noac not " | 1323 | printk(KERN_WARNING "CIFS: Mount option noac not " |
1330 | "supported. Instead set " | 1324 | "supported. Instead set " |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 302ea15f02e6..06866841b97f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -241,7 +241,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, | |||
241 | /* BB need same check in cifs_create too? */ | 241 | /* BB need same check in cifs_create too? */ |
242 | /* if not oplocked, invalidate inode pages if mtime or file | 242 | /* if not oplocked, invalidate inode pages if mtime or file |
243 | size changed */ | 243 | size changed */ |
244 | temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime)); | 244 | temp = cifs_NTtimeToUnix(buf->LastWriteTime); |
245 | if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && | 245 | if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && |
246 | (file->f_path.dentry->d_inode->i_size == | 246 | (file->f_path.dentry->d_inode->i_size == |
247 | (loff_t)le64_to_cpu(buf->EndOfFile))) { | 247 | (loff_t)le64_to_cpu(buf->EndOfFile))) { |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9c869a6dcba1..fad882b075ba 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -85,10 +85,10 @@ static void cifs_unix_info_to_inode(struct inode *inode, | |||
85 | __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes); | 85 | __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes); |
86 | __u64 end_of_file = le64_to_cpu(info->EndOfFile); | 86 | __u64 end_of_file = le64_to_cpu(info->EndOfFile); |
87 | 87 | ||
88 | inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(info->LastAccessTime)); | 88 | inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime); |
89 | inode->i_mtime = | 89 | inode->i_mtime = |
90 | cifs_NTtimeToUnix(le64_to_cpu(info->LastModificationTime)); | 90 | cifs_NTtimeToUnix(info->LastModificationTime); |
91 | inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(info->LastStatusChange)); | 91 | inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange); |
92 | inode->i_mode = le64_to_cpu(info->Permissions); | 92 | inode->i_mode = le64_to_cpu(info->Permissions); |
93 | 93 | ||
94 | /* | 94 | /* |
@@ -554,14 +554,11 @@ int cifs_get_inode_info(struct inode **pinode, | |||
554 | 554 | ||
555 | /* Linux can not store file creation time so ignore it */ | 555 | /* Linux can not store file creation time so ignore it */ |
556 | if (pfindData->LastAccessTime) | 556 | if (pfindData->LastAccessTime) |
557 | inode->i_atime = cifs_NTtimeToUnix | 557 | inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime); |
558 | (le64_to_cpu(pfindData->LastAccessTime)); | ||
559 | else /* do not need to use current_fs_time - time not stored */ | 558 | else /* do not need to use current_fs_time - time not stored */ |
560 | inode->i_atime = CURRENT_TIME; | 559 | inode->i_atime = CURRENT_TIME; |
561 | inode->i_mtime = | 560 | inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime); |
562 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); | 561 | inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime); |
563 | inode->i_ctime = | ||
564 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); | ||
565 | cFYI(DBG2, ("Attributes came in as 0x%x", attr)); | 562 | cFYI(DBG2, ("Attributes came in as 0x%x", attr)); |
566 | if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { | 563 | if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { |
567 | inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; | 564 | inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; |
@@ -629,7 +626,7 @@ int cifs_get_inode_info(struct inode **pinode, | |||
629 | /* fill in 0777 bits from ACL */ | 626 | /* fill in 0777 bits from ACL */ |
630 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { | 627 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { |
631 | cFYI(1, ("Getting mode bits from ACL")); | 628 | cFYI(1, ("Getting mode bits from ACL")); |
632 | acl_to_uid_mode(inode, full_path, pfid); | 629 | acl_to_uid_mode(cifs_sb, inode, full_path, pfid); |
633 | } | 630 | } |
634 | #endif | 631 | #endif |
635 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { | 632 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { |
@@ -699,7 +696,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) | |||
699 | } | 696 | } |
700 | 697 | ||
701 | /* gets root inode */ | 698 | /* gets root inode */ |
702 | struct inode *cifs_iget(struct super_block *sb, unsigned long ino) | 699 | struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) |
703 | { | 700 | { |
704 | int xid; | 701 | int xid; |
705 | struct cifs_sb_info *cifs_sb; | 702 | struct cifs_sb_info *cifs_sb; |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index e2fe998989a3..32d6baa0a54f 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -853,12 +853,12 @@ smbCalcSize_LE(struct smb_hdr *ptr) | |||
853 | 853 | ||
854 | #define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) | 854 | #define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) |
855 | 855 | ||
856 | /* | 856 | /* |
857 | * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units) | 857 | * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units) |
858 | * into Unix UTC (based 1970-01-01, in seconds). | 858 | * into Unix UTC (based 1970-01-01, in seconds). |
859 | */ | 859 | */ |
860 | struct timespec | 860 | struct timespec |
861 | cifs_NTtimeToUnix(u64 ntutc) | 861 | cifs_NTtimeToUnix(__le64 ntutc) |
862 | { | 862 | { |
863 | struct timespec ts; | 863 | struct timespec ts; |
864 | /* BB what about the timezone? BB */ | 864 | /* BB what about the timezone? BB */ |
@@ -866,7 +866,7 @@ cifs_NTtimeToUnix(u64 ntutc) | |||
866 | /* Subtract the NTFS time offset, then convert to 1s intervals. */ | 866 | /* Subtract the NTFS time offset, then convert to 1s intervals. */ |
867 | u64 t; | 867 | u64 t; |
868 | 868 | ||
869 | t = ntutc - NTFS_TIME_OFFSET; | 869 | t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET; |
870 | ts.tv_nsec = do_div(t, 10000000) * 100; | 870 | ts.tv_nsec = do_div(t, 10000000) * 100; |
871 | ts.tv_sec = t; | 871 | ts.tv_sec = t; |
872 | return ts; | 872 | return ts; |
@@ -883,16 +883,12 @@ cifs_UnixTimeToNT(struct timespec t) | |||
883 | static int total_days_of_prev_months[] = | 883 | static int total_days_of_prev_months[] = |
884 | {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; | 884 | {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; |
885 | 885 | ||
886 | 886 | struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) | |
887 | __le64 cnvrtDosCifsTm(__u16 date, __u16 time) | ||
888 | { | ||
889 | return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time))); | ||
890 | } | ||
891 | |||
892 | struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) | ||
893 | { | 887 | { |
894 | struct timespec ts; | 888 | struct timespec ts; |
895 | int sec, min, days, month, year; | 889 | int sec, min, days, month, year; |
890 | u16 date = le16_to_cpu(le_date); | ||
891 | u16 time = le16_to_cpu(le_time); | ||
896 | SMB_TIME *st = (SMB_TIME *)&time; | 892 | SMB_TIME *st = (SMB_TIME *)&time; |
897 | SMB_DATE *sd = (SMB_DATE *)&date; | 893 | SMB_DATE *sd = (SMB_DATE *)&date; |
898 | 894 | ||
@@ -933,7 +929,7 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) | |||
933 | days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0); | 929 | days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0); |
934 | sec += 24 * 60 * 60 * days; | 930 | sec += 24 * 60 * 60 * days; |
935 | 931 | ||
936 | ts.tv_sec = sec; | 932 | ts.tv_sec = sec + offset; |
937 | 933 | ||
938 | /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */ | 934 | /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */ |
939 | 935 | ||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 964e097c8203..86d0055dc529 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -115,17 +115,6 @@ construct_dentry(struct qstr *qstring, struct file *file, | |||
115 | return rc; | 115 | return rc; |
116 | } | 116 | } |
117 | 117 | ||
118 | static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode) | ||
119 | { | ||
120 | if ((tcon) && (tcon->ses) && (tcon->ses->server)) { | ||
121 | inode->i_ctime.tv_sec += tcon->ses->server->timeAdj; | ||
122 | inode->i_mtime.tv_sec += tcon->ses->server->timeAdj; | ||
123 | inode->i_atime.tv_sec += tcon->ses->server->timeAdj; | ||
124 | } | ||
125 | return; | ||
126 | } | ||
127 | |||
128 | |||
129 | static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, | 118 | static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, |
130 | char *buf, unsigned int *pobject_type, int isNewInode) | 119 | char *buf, unsigned int *pobject_type, int isNewInode) |
131 | { | 120 | { |
@@ -150,26 +139,25 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, | |||
150 | allocation_size = le64_to_cpu(pfindData->AllocationSize); | 139 | allocation_size = le64_to_cpu(pfindData->AllocationSize); |
151 | end_of_file = le64_to_cpu(pfindData->EndOfFile); | 140 | end_of_file = le64_to_cpu(pfindData->EndOfFile); |
152 | tmp_inode->i_atime = | 141 | tmp_inode->i_atime = |
153 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); | 142 | cifs_NTtimeToUnix(pfindData->LastAccessTime); |
154 | tmp_inode->i_mtime = | 143 | tmp_inode->i_mtime = |
155 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); | 144 | cifs_NTtimeToUnix(pfindData->LastWriteTime); |
156 | tmp_inode->i_ctime = | 145 | tmp_inode->i_ctime = |
157 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); | 146 | cifs_NTtimeToUnix(pfindData->ChangeTime); |
158 | } else { /* legacy, OS2 and DOS style */ | 147 | } else { /* legacy, OS2 and DOS style */ |
159 | /* struct timespec ts;*/ | 148 | int offset = cifs_sb->tcon->ses->server->timeAdj; |
160 | FIND_FILE_STANDARD_INFO *pfindData = | 149 | FIND_FILE_STANDARD_INFO *pfindData = |
161 | (FIND_FILE_STANDARD_INFO *)buf; | 150 | (FIND_FILE_STANDARD_INFO *)buf; |
162 | 151 | ||
163 | tmp_inode->i_mtime = cnvrtDosUnixTm( | 152 | tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate, |
164 | le16_to_cpu(pfindData->LastWriteDate), | 153 | pfindData->LastWriteTime, |
165 | le16_to_cpu(pfindData->LastWriteTime)); | 154 | offset); |
166 | tmp_inode->i_atime = cnvrtDosUnixTm( | 155 | tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate, |
167 | le16_to_cpu(pfindData->LastAccessDate), | 156 | pfindData->LastAccessTime, |
168 | le16_to_cpu(pfindData->LastAccessTime)); | 157 | offset); |
169 | tmp_inode->i_ctime = cnvrtDosUnixTm( | 158 | tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate, |
170 | le16_to_cpu(pfindData->LastWriteDate), | 159 | pfindData->LastWriteTime, |
171 | le16_to_cpu(pfindData->LastWriteTime)); | 160 | offset); |
172 | AdjustForTZ(cifs_sb->tcon, tmp_inode); | ||
173 | attr = le16_to_cpu(pfindData->Attributes); | 161 | attr = le16_to_cpu(pfindData->Attributes); |
174 | allocation_size = le32_to_cpu(pfindData->AllocationSize); | 162 | allocation_size = le32_to_cpu(pfindData->AllocationSize); |
175 | end_of_file = le32_to_cpu(pfindData->DataSize); | 163 | end_of_file = le32_to_cpu(pfindData->DataSize); |
@@ -331,11 +319,11 @@ static void unix_fill_in_inode(struct inode *tmp_inode, | |||
331 | local_size = tmp_inode->i_size; | 319 | local_size = tmp_inode->i_size; |
332 | 320 | ||
333 | tmp_inode->i_atime = | 321 | tmp_inode->i_atime = |
334 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); | 322 | cifs_NTtimeToUnix(pfindData->LastAccessTime); |
335 | tmp_inode->i_mtime = | 323 | tmp_inode->i_mtime = |
336 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastModificationTime)); | 324 | cifs_NTtimeToUnix(pfindData->LastModificationTime); |
337 | tmp_inode->i_ctime = | 325 | tmp_inode->i_ctime = |
338 | cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastStatusChange)); | 326 | cifs_NTtimeToUnix(pfindData->LastStatusChange); |
339 | 327 | ||
340 | tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions); | 328 | tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions); |
341 | /* since we set the inode type below we need to mask off type | 329 | /* since we set the inode type below we need to mask off type |
diff --git a/fs/coda/file.c b/fs/coda/file.c index 6a347fbc998a..ffd42815fda1 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -47,6 +47,8 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, | |||
47 | struct pipe_inode_info *pipe, size_t count, | 47 | struct pipe_inode_info *pipe, size_t count, |
48 | unsigned int flags) | 48 | unsigned int flags) |
49 | { | 49 | { |
50 | ssize_t (*splice_read)(struct file *, loff_t *, | ||
51 | struct pipe_inode_info *, size_t, unsigned int); | ||
50 | struct coda_file_info *cfi; | 52 | struct coda_file_info *cfi; |
51 | struct file *host_file; | 53 | struct file *host_file; |
52 | 54 | ||
@@ -54,10 +56,11 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, | |||
54 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); | 56 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); |
55 | host_file = cfi->cfi_container; | 57 | host_file = cfi->cfi_container; |
56 | 58 | ||
57 | if (!host_file->f_op || !host_file->f_op->splice_read) | 59 | splice_read = host_file->f_op->splice_read; |
58 | return -EINVAL; | 60 | if (!splice_read) |
61 | splice_read = default_file_splice_read; | ||
59 | 62 | ||
60 | return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags); | 63 | return splice_read(host_file, ppos, pipe, count, flags); |
61 | } | 64 | } |
62 | 65 | ||
63 | static ssize_t | 66 | static ssize_t |
diff --git a/fs/compat.c b/fs/compat.c index 681ed81e6be0..6aefb776dfeb 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -812,10 +812,8 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, | |||
812 | } | 812 | } |
813 | } | 813 | } |
814 | 814 | ||
815 | lock_kernel(); | ||
816 | retval = do_mount((char*)dev_page, dir_page, (char*)type_page, | 815 | retval = do_mount((char*)dev_page, dir_page, (char*)type_page, |
817 | flags, (void*)data_page); | 816 | flags, (void*)data_page); |
818 | unlock_kernel(); | ||
819 | 817 | ||
820 | out4: | 818 | out4: |
821 | free_page(data_page); | 819 | free_page(data_page); |
@@ -1488,7 +1486,7 @@ int compat_do_execve(char * filename, | |||
1488 | if (!bprm) | 1486 | if (!bprm) |
1489 | goto out_files; | 1487 | goto out_files; |
1490 | 1488 | ||
1491 | retval = mutex_lock_interruptible(¤t->cred_exec_mutex); | 1489 | retval = mutex_lock_interruptible(¤t->cred_guard_mutex); |
1492 | if (retval < 0) | 1490 | if (retval < 0) |
1493 | goto out_free; | 1491 | goto out_free; |
1494 | current->in_execve = 1; | 1492 | current->in_execve = 1; |
@@ -1550,7 +1548,7 @@ int compat_do_execve(char * filename, | |||
1550 | /* execve succeeded */ | 1548 | /* execve succeeded */ |
1551 | current->fs->in_exec = 0; | 1549 | current->fs->in_exec = 0; |
1552 | current->in_execve = 0; | 1550 | current->in_execve = 0; |
1553 | mutex_unlock(¤t->cred_exec_mutex); | 1551 | mutex_unlock(¤t->cred_guard_mutex); |
1554 | acct_update_integrals(current); | 1552 | acct_update_integrals(current); |
1555 | free_bprm(bprm); | 1553 | free_bprm(bprm); |
1556 | if (displaced) | 1554 | if (displaced) |
@@ -1573,7 +1571,7 @@ out_unmark: | |||
1573 | 1571 | ||
1574 | out_unlock: | 1572 | out_unlock: |
1575 | current->in_execve = 0; | 1573 | current->in_execve = 0; |
1576 | mutex_unlock(¤t->cred_exec_mutex); | 1574 | mutex_unlock(¤t->cred_guard_mutex); |
1577 | 1575 | ||
1578 | out_free: | 1576 | out_free: |
1579 | free_bprm(bprm); | 1577 | free_bprm(bprm); |
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 762d287123ca..da6061a6df40 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h | |||
@@ -39,6 +39,9 @@ struct configfs_dirent { | |||
39 | umode_t s_mode; | 39 | umode_t s_mode; |
40 | struct dentry * s_dentry; | 40 | struct dentry * s_dentry; |
41 | struct iattr * s_iattr; | 41 | struct iattr * s_iattr; |
42 | #ifdef CONFIG_LOCKDEP | ||
43 | int s_depth; | ||
44 | #endif | ||
42 | }; | 45 | }; |
43 | 46 | ||
44 | #define CONFIGFS_ROOT 0x0001 | 47 | #define CONFIGFS_ROOT 0x0001 |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 05373db21a4e..8e48b52205aa 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -78,11 +78,97 @@ static const struct dentry_operations configfs_dentry_ops = { | |||
78 | .d_delete = configfs_d_delete, | 78 | .d_delete = configfs_d_delete, |
79 | }; | 79 | }; |
80 | 80 | ||
81 | #ifdef CONFIG_LOCKDEP | ||
82 | |||
83 | /* | ||
84 | * Helpers to make lockdep happy with our recursive locking of default groups' | ||
85 | * inodes (see configfs_attach_group() and configfs_detach_group()). | ||
86 | * We put default groups i_mutexes in separate classes according to their depth | ||
87 | * from the youngest non-default group ancestor. | ||
88 | * | ||
89 | * For a non-default group A having default groups A/B, A/C, and A/C/D, default | ||
90 | * groups A/B and A/C will have their inode's mutex in class | ||
91 | * default_group_class[0], and default group A/C/D will be in | ||
92 | * default_group_class[1]. | ||
93 | * | ||
94 | * The lock classes are declared and assigned in inode.c, according to the | ||
95 | * s_depth value. | ||
96 | * The s_depth value is initialized to -1, adjusted to >= 0 when attaching | ||
97 | * default groups, and reset to -1 when all default groups are attached. During | ||
98 | * attachment, if configfs_create() sees s_depth > 0, the lock class of the new | ||
99 | * inode's mutex is set to default_group_class[s_depth - 1]. | ||
100 | */ | ||
101 | |||
102 | static void configfs_init_dirent_depth(struct configfs_dirent *sd) | ||
103 | { | ||
104 | sd->s_depth = -1; | ||
105 | } | ||
106 | |||
107 | static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, | ||
108 | struct configfs_dirent *sd) | ||
109 | { | ||
110 | int parent_depth = parent_sd->s_depth; | ||
111 | |||
112 | if (parent_depth >= 0) | ||
113 | sd->s_depth = parent_depth + 1; | ||
114 | } | ||
115 | |||
116 | static void | ||
117 | configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) | ||
118 | { | ||
119 | /* | ||
120 | * item's i_mutex class is already setup, so s_depth is now only | ||
121 | * used to set new sub-directories s_depth, which is always done | ||
122 | * with item's i_mutex locked. | ||
123 | */ | ||
124 | /* | ||
125 | * sd->s_depth == -1 iff we are a non default group. | ||
126 | * else (we are a default group) sd->s_depth > 0 (see | ||
127 | * create_dir()). | ||
128 | */ | ||
129 | if (sd->s_depth == -1) | ||
130 | /* | ||
131 | * We are a non default group and we are going to create | ||
132 | * default groups. | ||
133 | */ | ||
134 | sd->s_depth = 0; | ||
135 | } | ||
136 | |||
137 | static void | ||
138 | configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) | ||
139 | { | ||
140 | /* We will not create default groups anymore. */ | ||
141 | sd->s_depth = -1; | ||
142 | } | ||
143 | |||
144 | #else /* CONFIG_LOCKDEP */ | ||
145 | |||
146 | static void configfs_init_dirent_depth(struct configfs_dirent *sd) | ||
147 | { | ||
148 | } | ||
149 | |||
150 | static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, | ||
151 | struct configfs_dirent *sd) | ||
152 | { | ||
153 | } | ||
154 | |||
155 | static void | ||
156 | configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) | ||
157 | { | ||
158 | } | ||
159 | |||
160 | static void | ||
161 | configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) | ||
162 | { | ||
163 | } | ||
164 | |||
165 | #endif /* CONFIG_LOCKDEP */ | ||
166 | |||
81 | /* | 167 | /* |
82 | * Allocates a new configfs_dirent and links it to the parent configfs_dirent | 168 | * Allocates a new configfs_dirent and links it to the parent configfs_dirent |
83 | */ | 169 | */ |
84 | static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd, | 170 | static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, |
85 | void * element) | 171 | void *element, int type) |
86 | { | 172 | { |
87 | struct configfs_dirent * sd; | 173 | struct configfs_dirent * sd; |
88 | 174 | ||
@@ -94,6 +180,8 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare | |||
94 | INIT_LIST_HEAD(&sd->s_links); | 180 | INIT_LIST_HEAD(&sd->s_links); |
95 | INIT_LIST_HEAD(&sd->s_children); | 181 | INIT_LIST_HEAD(&sd->s_children); |
96 | sd->s_element = element; | 182 | sd->s_element = element; |
183 | sd->s_type = type; | ||
184 | configfs_init_dirent_depth(sd); | ||
97 | spin_lock(&configfs_dirent_lock); | 185 | spin_lock(&configfs_dirent_lock); |
98 | if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { | 186 | if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { |
99 | spin_unlock(&configfs_dirent_lock); | 187 | spin_unlock(&configfs_dirent_lock); |
@@ -138,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd, | |||
138 | { | 226 | { |
139 | struct configfs_dirent * sd; | 227 | struct configfs_dirent * sd; |
140 | 228 | ||
141 | sd = configfs_new_dirent(parent_sd, element); | 229 | sd = configfs_new_dirent(parent_sd, element, type); |
142 | if (IS_ERR(sd)) | 230 | if (IS_ERR(sd)) |
143 | return PTR_ERR(sd); | 231 | return PTR_ERR(sd); |
144 | 232 | ||
145 | sd->s_mode = mode; | 233 | sd->s_mode = mode; |
146 | sd->s_type = type; | ||
147 | sd->s_dentry = dentry; | 234 | sd->s_dentry = dentry; |
148 | if (dentry) { | 235 | if (dentry) { |
149 | dentry->d_fsdata = configfs_get(sd); | 236 | dentry->d_fsdata = configfs_get(sd); |
@@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p, | |||
187 | error = configfs_make_dirent(p->d_fsdata, d, k, mode, | 274 | error = configfs_make_dirent(p->d_fsdata, d, k, mode, |
188 | CONFIGFS_DIR | CONFIGFS_USET_CREATING); | 275 | CONFIGFS_DIR | CONFIGFS_USET_CREATING); |
189 | if (!error) { | 276 | if (!error) { |
277 | configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata); | ||
190 | error = configfs_create(d, mode, init_dir); | 278 | error = configfs_create(d, mode, init_dir); |
191 | if (!error) { | 279 | if (!error) { |
192 | inc_nlink(p->d_inode); | 280 | inc_nlink(p->d_inode); |
@@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item, | |||
789 | * error, as rmdir() would. | 877 | * error, as rmdir() would. |
790 | */ | 878 | */ |
791 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); | 879 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); |
880 | configfs_adjust_dir_dirent_depth_before_populate(sd); | ||
792 | ret = populate_groups(to_config_group(item)); | 881 | ret = populate_groups(to_config_group(item)); |
793 | if (ret) { | 882 | if (ret) { |
794 | configfs_detach_item(item); | 883 | configfs_detach_item(item); |
795 | dentry->d_inode->i_flags |= S_DEAD; | 884 | dentry->d_inode->i_flags |= S_DEAD; |
796 | } | 885 | } |
886 | configfs_adjust_dir_dirent_depth_after_populate(sd); | ||
797 | mutex_unlock(&dentry->d_inode->i_mutex); | 887 | mutex_unlock(&dentry->d_inode->i_mutex); |
798 | if (ret) | 888 | if (ret) |
799 | d_delete(dentry); | 889 | d_delete(dentry); |
@@ -916,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level) | |||
916 | * Note, btw, that this can be called at *any* time, even when a configfs | 1006 | * Note, btw, that this can be called at *any* time, even when a configfs |
917 | * subsystem isn't registered, or when configfs is loading or unloading. | 1007 | * subsystem isn't registered, or when configfs is loading or unloading. |
918 | * Just like configfs_register_subsystem(). So we take the same | 1008 | * Just like configfs_register_subsystem(). So we take the same |
919 | * precautions. We pin the filesystem. We lock each i_mutex _in_order_ | 1009 | * precautions. We pin the filesystem. We lock configfs_dirent_lock. |
920 | * on our way down the tree. If we can find the target item in the | 1010 | * If we can find the target item in the |
921 | * configfs tree, it must be part of the subsystem tree as well, so we | 1011 | * configfs tree, it must be part of the subsystem tree as well, so we |
922 | * do not need the subsystem semaphore. Holding the i_mutex chain locks | 1012 | * do not need the subsystem semaphore. Holding configfs_dirent_lock helps |
923 | * out mkdir() and rmdir(), who might be racing us. | 1013 | * locking out mkdir() and rmdir(), who might be racing us. |
924 | */ | 1014 | */ |
925 | 1015 | ||
926 | /* | 1016 | /* |
@@ -933,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level) | |||
933 | * do that so we can unlock it if we find nothing. | 1023 | * do that so we can unlock it if we find nothing. |
934 | * | 1024 | * |
935 | * Here we do a depth-first search of the dentry hierarchy looking for | 1025 | * Here we do a depth-first search of the dentry hierarchy looking for |
936 | * our object. We take i_mutex on each step of the way down. IT IS | 1026 | * our object. |
937 | * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, | 1027 | * We deliberately ignore items tagged as dropping since they are virtually |
938 | * we'll drop the i_mutex. | 1028 | * dead, as well as items in the middle of attachment since they virtually |
1029 | * do not exist yet. This completes the locking out of racing mkdir() and | ||
1030 | * rmdir(). | ||
1031 | * Note: subdirectories in the middle of attachment start with s_type = | ||
1032 | * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When | ||
1033 | * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of | ||
1034 | * s_type is in configfs_new_dirent(), which has configfs_dirent_lock. | ||
939 | * | 1035 | * |
940 | * If the target is not found, -ENOENT is bubbled up and we have released | 1036 | * If the target is not found, -ENOENT is bubbled up. |
941 | * all locks. If the target was found, the locks will be cleared by | ||
942 | * configfs_depend_rollback(). | ||
943 | * | 1037 | * |
944 | * This adds a requirement that all config_items be unique! | 1038 | * This adds a requirement that all config_items be unique! |
945 | * | 1039 | * |
946 | * This is recursive because the locking traversal is tricky. There isn't | 1040 | * This is recursive. There isn't |
947 | * much on the stack, though, so folks that need this function - be careful | 1041 | * much on the stack, though, so folks that need this function - be careful |
948 | * about your stack! Patches will be accepted to make it iterative. | 1042 | * about your stack! Patches will be accepted to make it iterative. |
949 | */ | 1043 | */ |
@@ -955,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin, | |||
955 | 1049 | ||
956 | BUG_ON(!origin || !sd); | 1050 | BUG_ON(!origin || !sd); |
957 | 1051 | ||
958 | /* Lock this guy on the way down */ | ||
959 | mutex_lock(&sd->s_dentry->d_inode->i_mutex); | ||
960 | if (sd->s_element == target) /* Boo-yah */ | 1052 | if (sd->s_element == target) /* Boo-yah */ |
961 | goto out; | 1053 | goto out; |
962 | 1054 | ||
963 | list_for_each_entry(child_sd, &sd->s_children, s_sibling) { | 1055 | list_for_each_entry(child_sd, &sd->s_children, s_sibling) { |
964 | if (child_sd->s_type & CONFIGFS_DIR) { | 1056 | if ((child_sd->s_type & CONFIGFS_DIR) && |
1057 | !(child_sd->s_type & CONFIGFS_USET_DROPPING) && | ||
1058 | !(child_sd->s_type & CONFIGFS_USET_CREATING)) { | ||
965 | ret = configfs_depend_prep(child_sd->s_dentry, | 1059 | ret = configfs_depend_prep(child_sd->s_dentry, |
966 | target); | 1060 | target); |
967 | if (!ret) | 1061 | if (!ret) |
@@ -970,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin, | |||
970 | } | 1064 | } |
971 | 1065 | ||
972 | /* We looped all our children and didn't find target */ | 1066 | /* We looped all our children and didn't find target */ |
973 | mutex_unlock(&sd->s_dentry->d_inode->i_mutex); | ||
974 | ret = -ENOENT; | 1067 | ret = -ENOENT; |
975 | 1068 | ||
976 | out: | 1069 | out: |
977 | return ret; | 1070 | return ret; |
978 | } | 1071 | } |
979 | 1072 | ||
980 | /* | ||
981 | * This is ONLY called if configfs_depend_prep() did its job. So we can | ||
982 | * trust the entire path from item back up to origin. | ||
983 | * | ||
984 | * We walk backwards from item, unlocking each i_mutex. We finish by | ||
985 | * unlocking origin. | ||
986 | */ | ||
987 | static void configfs_depend_rollback(struct dentry *origin, | ||
988 | struct config_item *item) | ||
989 | { | ||
990 | struct dentry *dentry = item->ci_dentry; | ||
991 | |||
992 | while (dentry != origin) { | ||
993 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
994 | dentry = dentry->d_parent; | ||
995 | } | ||
996 | |||
997 | mutex_unlock(&origin->d_inode->i_mutex); | ||
998 | } | ||
999 | |||
1000 | int configfs_depend_item(struct configfs_subsystem *subsys, | 1073 | int configfs_depend_item(struct configfs_subsystem *subsys, |
1001 | struct config_item *target) | 1074 | struct config_item *target) |
1002 | { | 1075 | { |
@@ -1037,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys, | |||
1037 | 1110 | ||
1038 | /* Ok, now we can trust subsys/s_item */ | 1111 | /* Ok, now we can trust subsys/s_item */ |
1039 | 1112 | ||
1040 | /* Scan the tree, locking i_mutex recursively, return 0 if found */ | 1113 | spin_lock(&configfs_dirent_lock); |
1114 | /* Scan the tree, return 0 if found */ | ||
1041 | ret = configfs_depend_prep(subsys_sd->s_dentry, target); | 1115 | ret = configfs_depend_prep(subsys_sd->s_dentry, target); |
1042 | if (ret) | 1116 | if (ret) |
1043 | goto out_unlock_fs; | 1117 | goto out_unlock_dirent_lock; |
1044 | 1118 | ||
1045 | /* We hold all i_mutexes from the subsystem down to the target */ | 1119 | /* |
1120 | * We are sure that the item is not about to be removed by rmdir(), and | ||
1121 | * not in the middle of attachment by mkdir(). | ||
1122 | */ | ||
1046 | p = target->ci_dentry->d_fsdata; | 1123 | p = target->ci_dentry->d_fsdata; |
1047 | p->s_dependent_count += 1; | 1124 | p->s_dependent_count += 1; |
1048 | 1125 | ||
1049 | configfs_depend_rollback(subsys_sd->s_dentry, target); | 1126 | out_unlock_dirent_lock: |
1050 | 1127 | spin_unlock(&configfs_dirent_lock); | |
1051 | out_unlock_fs: | 1128 | out_unlock_fs: |
1052 | mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); | 1129 | mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); |
1053 | 1130 | ||
@@ -1072,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, | |||
1072 | struct configfs_dirent *sd; | 1149 | struct configfs_dirent *sd; |
1073 | 1150 | ||
1074 | /* | 1151 | /* |
1075 | * Since we can trust everything is pinned, we just need i_mutex | 1152 | * Since we can trust everything is pinned, we just need |
1076 | * on the item. | 1153 | * configfs_dirent_lock. |
1077 | */ | 1154 | */ |
1078 | mutex_lock(&target->ci_dentry->d_inode->i_mutex); | 1155 | spin_lock(&configfs_dirent_lock); |
1079 | 1156 | ||
1080 | sd = target->ci_dentry->d_fsdata; | 1157 | sd = target->ci_dentry->d_fsdata; |
1081 | BUG_ON(sd->s_dependent_count < 1); | 1158 | BUG_ON(sd->s_dependent_count < 1); |
@@ -1086,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, | |||
1086 | * After this unlock, we cannot trust the item to stay alive! | 1163 | * After this unlock, we cannot trust the item to stay alive! |
1087 | * DO NOT REFERENCE item after this unlock. | 1164 | * DO NOT REFERENCE item after this unlock. |
1088 | */ | 1165 | */ |
1089 | mutex_unlock(&target->ci_dentry->d_inode->i_mutex); | 1166 | spin_unlock(&configfs_dirent_lock); |
1090 | } | 1167 | } |
1091 | EXPORT_SYMBOL(configfs_undepend_item); | 1168 | EXPORT_SYMBOL(configfs_undepend_item); |
1092 | 1169 | ||
@@ -1286,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1286 | if (sd->s_type & CONFIGFS_USET_DEFAULT) | 1363 | if (sd->s_type & CONFIGFS_USET_DEFAULT) |
1287 | return -EPERM; | 1364 | return -EPERM; |
1288 | 1365 | ||
1289 | /* | ||
1290 | * Here's where we check for dependents. We're protected by | ||
1291 | * i_mutex. | ||
1292 | */ | ||
1293 | if (sd->s_dependent_count) | ||
1294 | return -EBUSY; | ||
1295 | |||
1296 | /* Get a working ref until we have the child */ | 1366 | /* Get a working ref until we have the child */ |
1297 | parent_item = configfs_get_config_item(dentry->d_parent); | 1367 | parent_item = configfs_get_config_item(dentry->d_parent); |
1298 | subsys = to_config_group(parent_item)->cg_subsys; | 1368 | subsys = to_config_group(parent_item)->cg_subsys; |
@@ -1316,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1316 | 1386 | ||
1317 | mutex_lock(&configfs_symlink_mutex); | 1387 | mutex_lock(&configfs_symlink_mutex); |
1318 | spin_lock(&configfs_dirent_lock); | 1388 | spin_lock(&configfs_dirent_lock); |
1319 | ret = configfs_detach_prep(dentry, &wait_mutex); | 1389 | /* |
1320 | if (ret) | 1390 | * Here's where we check for dependents. We're protected by |
1321 | configfs_detach_rollback(dentry); | 1391 | * configfs_dirent_lock. |
1392 | * If no dependent, atomically tag the item as dropping. | ||
1393 | */ | ||
1394 | ret = sd->s_dependent_count ? -EBUSY : 0; | ||
1395 | if (!ret) { | ||
1396 | ret = configfs_detach_prep(dentry, &wait_mutex); | ||
1397 | if (ret) | ||
1398 | configfs_detach_rollback(dentry); | ||
1399 | } | ||
1322 | spin_unlock(&configfs_dirent_lock); | 1400 | spin_unlock(&configfs_dirent_lock); |
1323 | mutex_unlock(&configfs_symlink_mutex); | 1401 | mutex_unlock(&configfs_symlink_mutex); |
1324 | 1402 | ||
@@ -1429,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) | |||
1429 | */ | 1507 | */ |
1430 | err = -ENOENT; | 1508 | err = -ENOENT; |
1431 | if (configfs_dirent_is_ready(parent_sd)) { | 1509 | if (configfs_dirent_is_ready(parent_sd)) { |
1432 | file->private_data = configfs_new_dirent(parent_sd, NULL); | 1510 | file->private_data = configfs_new_dirent(parent_sd, NULL, 0); |
1433 | if (IS_ERR(file->private_data)) | 1511 | if (IS_ERR(file->private_data)) |
1434 | err = PTR_ERR(file->private_data); | 1512 | err = PTR_ERR(file->private_data); |
1435 | else | 1513 | else |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5d349d38e056..4921e7426d95 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -33,10 +33,15 @@ | |||
33 | #include <linux/backing-dev.h> | 33 | #include <linux/backing-dev.h> |
34 | #include <linux/capability.h> | 34 | #include <linux/capability.h> |
35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
36 | #include <linux/lockdep.h> | ||
36 | 37 | ||
37 | #include <linux/configfs.h> | 38 | #include <linux/configfs.h> |
38 | #include "configfs_internal.h" | 39 | #include "configfs_internal.h" |
39 | 40 | ||
41 | #ifdef CONFIG_LOCKDEP | ||
42 | static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; | ||
43 | #endif | ||
44 | |||
40 | extern struct super_block * configfs_sb; | 45 | extern struct super_block * configfs_sb; |
41 | 46 | ||
42 | static const struct address_space_operations configfs_aops = { | 47 | static const struct address_space_operations configfs_aops = { |
@@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) | |||
150 | return inode; | 155 | return inode; |
151 | } | 156 | } |
152 | 157 | ||
158 | #ifdef CONFIG_LOCKDEP | ||
159 | |||
160 | static void configfs_set_inode_lock_class(struct configfs_dirent *sd, | ||
161 | struct inode *inode) | ||
162 | { | ||
163 | int depth = sd->s_depth; | ||
164 | |||
165 | if (depth > 0) { | ||
166 | if (depth <= ARRAY_SIZE(default_group_class)) { | ||
167 | lockdep_set_class(&inode->i_mutex, | ||
168 | &default_group_class[depth - 1]); | ||
169 | } else { | ||
170 | /* | ||
171 | * In practice the maximum level of locking depth is | ||
172 | * already reached. Just inform about possible reasons. | ||
173 | */ | ||
174 | printk(KERN_INFO "configfs: Too many levels of inodes" | ||
175 | " for the locking correctness validator.\n"); | ||
176 | printk(KERN_INFO "Spurious warnings may appear.\n"); | ||
177 | } | ||
178 | } | ||
179 | } | ||
180 | |||
181 | #else /* CONFIG_LOCKDEP */ | ||
182 | |||
183 | static void configfs_set_inode_lock_class(struct configfs_dirent *sd, | ||
184 | struct inode *inode) | ||
185 | { | ||
186 | } | ||
187 | |||
188 | #endif /* CONFIG_LOCKDEP */ | ||
189 | |||
153 | int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) | 190 | int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) |
154 | { | 191 | { |
155 | int error = 0; | 192 | int error = 0; |
@@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode * | |||
162 | struct inode *p_inode = dentry->d_parent->d_inode; | 199 | struct inode *p_inode = dentry->d_parent->d_inode; |
163 | p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; | 200 | p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; |
164 | } | 201 | } |
202 | configfs_set_inode_lock_class(sd, inode); | ||
165 | goto Proceed; | 203 | goto Proceed; |
166 | } | 204 | } |
167 | else | 205 | else |
diff --git a/fs/dcache.c b/fs/dcache.c index 75659a6fd1f8..9e5cd3c3a6ba 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root, | |||
1910 | 1910 | ||
1911 | spin_lock(&vfsmount_lock); | 1911 | spin_lock(&vfsmount_lock); |
1912 | prepend(&end, &buflen, "\0", 1); | 1912 | prepend(&end, &buflen, "\0", 1); |
1913 | if (!IS_ROOT(dentry) && d_unhashed(dentry) && | 1913 | if (d_unlinked(dentry) && |
1914 | (prepend(&end, &buflen, " (deleted)", 10) != 0)) | 1914 | (prepend(&end, &buflen, " (deleted)", 10) != 0)) |
1915 | goto Elong; | 1915 | goto Elong; |
1916 | 1916 | ||
@@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) | |||
2035 | 2035 | ||
2036 | spin_lock(&dcache_lock); | 2036 | spin_lock(&dcache_lock); |
2037 | prepend(&end, &buflen, "\0", 1); | 2037 | prepend(&end, &buflen, "\0", 1); |
2038 | if (!IS_ROOT(dentry) && d_unhashed(dentry) && | 2038 | if (d_unlinked(dentry) && |
2039 | (prepend(&end, &buflen, "//deleted", 9) != 0)) | 2039 | (prepend(&end, &buflen, "//deleted", 9) != 0)) |
2040 | goto Elong; | 2040 | goto Elong; |
2041 | if (buflen < 1) | 2041 | if (buflen < 1) |
@@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
2097 | read_unlock(¤t->fs->lock); | 2097 | read_unlock(¤t->fs->lock); |
2098 | 2098 | ||
2099 | error = -ENOENT; | 2099 | error = -ENOENT; |
2100 | /* Has the current directory has been unlinked? */ | ||
2101 | spin_lock(&dcache_lock); | 2100 | spin_lock(&dcache_lock); |
2102 | if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { | 2101 | if (!d_unlinked(pwd.dentry)) { |
2103 | unsigned long len; | 2102 | unsigned long len; |
2104 | struct path tmp = root; | 2103 | struct path tmp = root; |
2105 | char * cwd; | 2104 | char * cwd; |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c68edb969441..9b1d285f9fe6 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -557,8 +557,10 @@ static int __init init_devpts_fs(void) | |||
557 | int err = register_filesystem(&devpts_fs_type); | 557 | int err = register_filesystem(&devpts_fs_type); |
558 | if (!err) { | 558 | if (!err) { |
559 | devpts_mnt = kern_mount(&devpts_fs_type); | 559 | devpts_mnt = kern_mount(&devpts_fs_type); |
560 | if (IS_ERR(devpts_mnt)) | 560 | if (IS_ERR(devpts_mnt)) { |
561 | err = PTR_ERR(devpts_mnt); | 561 | err = PTR_ERR(devpts_mnt); |
562 | unregister_filesystem(&devpts_fs_type); | ||
563 | } | ||
562 | } | 564 | } |
563 | return err; | 565 | return err; |
564 | } | 566 | } |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 05763bbc2050..8b10b87dc01a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1127,7 +1127,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1127 | rw = WRITE_ODIRECT; | 1127 | rw = WRITE_ODIRECT; |
1128 | 1128 | ||
1129 | if (bdev) | 1129 | if (bdev) |
1130 | bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); | 1130 | bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); |
1131 | 1131 | ||
1132 | if (offset & blocksize_mask) { | 1132 | if (offset & blocksize_mask) { |
1133 | if (bdev) | 1133 | if (bdev) |
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 858fba14aaa6..c4dfa1dcc86f 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
@@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) | |||
49 | spin_unlock(&ls->ls_recover_list_lock); | 49 | spin_unlock(&ls->ls_recover_list_lock); |
50 | 50 | ||
51 | if (!found) | 51 | if (!found) |
52 | de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); | 52 | de = kzalloc(sizeof(struct dlm_direntry) + len, |
53 | ls->ls_allocation); | ||
53 | return de; | 54 | return de; |
54 | } | 55 | } |
55 | 56 | ||
@@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
211 | 212 | ||
212 | dlm_dir_clear(ls); | 213 | dlm_dir_clear(ls); |
213 | 214 | ||
214 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); | 215 | last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation); |
215 | if (!last_name) | 216 | if (!last_name) |
216 | goto out; | 217 | goto out; |
217 | 218 | ||
@@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
322 | if (namelen > DLM_RESNAME_MAXLEN) | 323 | if (namelen > DLM_RESNAME_MAXLEN) |
323 | return -EINVAL; | 324 | return -EINVAL; |
324 | 325 | ||
325 | de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); | 326 | de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation); |
326 | if (!de) | 327 | if (!de) |
327 | return -ENOMEM; | 328 | return -ENOMEM; |
328 | 329 | ||
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index cd8e2df3c295..d489fcc86713 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -384,7 +384,7 @@ static void threads_stop(void) | |||
384 | dlm_astd_stop(); | 384 | dlm_astd_stop(); |
385 | } | 385 | } |
386 | 386 | ||
387 | static int new_lockspace(char *name, int namelen, void **lockspace, | 387 | static int new_lockspace(const char *name, int namelen, void **lockspace, |
388 | uint32_t flags, int lvblen) | 388 | uint32_t flags, int lvblen) |
389 | { | 389 | { |
390 | struct dlm_ls *ls; | 390 | struct dlm_ls *ls; |
@@ -419,16 +419,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
419 | break; | 419 | break; |
420 | } | 420 | } |
421 | ls->ls_create_count++; | 421 | ls->ls_create_count++; |
422 | module_put(THIS_MODULE); | 422 | *lockspace = ls; |
423 | error = 1; /* not an error, return 0 */ | 423 | error = 1; |
424 | break; | 424 | break; |
425 | } | 425 | } |
426 | spin_unlock(&lslist_lock); | 426 | spin_unlock(&lslist_lock); |
427 | 427 | ||
428 | if (error < 0) | ||
429 | goto out; | ||
430 | if (error) | 428 | if (error) |
431 | goto ret_zero; | 429 | goto out; |
432 | 430 | ||
433 | error = -ENOMEM; | 431 | error = -ENOMEM; |
434 | 432 | ||
@@ -583,7 +581,6 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
583 | dlm_create_debug_file(ls); | 581 | dlm_create_debug_file(ls); |
584 | 582 | ||
585 | log_debug(ls, "join complete"); | 583 | log_debug(ls, "join complete"); |
586 | ret_zero: | ||
587 | *lockspace = ls; | 584 | *lockspace = ls; |
588 | return 0; | 585 | return 0; |
589 | 586 | ||
@@ -614,7 +611,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
614 | return error; | 611 | return error; |
615 | } | 612 | } |
616 | 613 | ||
617 | int dlm_new_lockspace(char *name, int namelen, void **lockspace, | 614 | int dlm_new_lockspace(const char *name, int namelen, void **lockspace, |
618 | uint32_t flags, int lvblen) | 615 | uint32_t flags, int lvblen) |
619 | { | 616 | { |
620 | int error = 0; | 617 | int error = 0; |
@@ -628,7 +625,9 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, | |||
628 | error = new_lockspace(name, namelen, lockspace, flags, lvblen); | 625 | error = new_lockspace(name, namelen, lockspace, flags, lvblen); |
629 | if (!error) | 626 | if (!error) |
630 | ls_count++; | 627 | ls_count++; |
631 | else if (!ls_count) | 628 | if (error > 0) |
629 | error = 0; | ||
630 | if (!ls_count) | ||
632 | threads_stop(); | 631 | threads_stop(); |
633 | out: | 632 | out: |
634 | mutex_unlock(&ls_lock); | 633 | mutex_unlock(&ls_lock); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 609108a83267..cdb580a9c7a2 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk) | |||
309 | lowcomms_write_space(sk); | 309 | lowcomms_write_space(sk); |
310 | } | 310 | } |
311 | 311 | ||
312 | int dlm_lowcomms_connect_node(int nodeid) | ||
313 | { | ||
314 | struct connection *con; | ||
315 | |||
316 | if (nodeid == dlm_our_nodeid()) | ||
317 | return 0; | ||
318 | |||
319 | con = nodeid2con(nodeid, GFP_NOFS); | ||
320 | if (!con) | ||
321 | return -ENOMEM; | ||
322 | lowcomms_connect_sock(con); | ||
323 | return 0; | ||
324 | } | ||
325 | |||
312 | /* Make a socket active */ | 326 | /* Make a socket active */ |
313 | static int add_sock(struct socket *sock, struct connection *con) | 327 | static int add_sock(struct socket *sock, struct connection *con) |
314 | { | 328 | { |
@@ -486,7 +500,7 @@ static void process_sctp_notification(struct connection *con, | |||
486 | return; | 500 | return; |
487 | } | 501 | } |
488 | 502 | ||
489 | new_con = nodeid2con(nodeid, GFP_KERNEL); | 503 | new_con = nodeid2con(nodeid, GFP_NOFS); |
490 | if (!new_con) | 504 | if (!new_con) |
491 | return; | 505 | return; |
492 | 506 | ||
@@ -722,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con) | |||
722 | * the same time and the connections cross on the wire. | 736 | * the same time and the connections cross on the wire. |
723 | * In this case we store the incoming one in "othercon" | 737 | * In this case we store the incoming one in "othercon" |
724 | */ | 738 | */ |
725 | newcon = nodeid2con(nodeid, GFP_KERNEL); | 739 | newcon = nodeid2con(nodeid, GFP_NOFS); |
726 | if (!newcon) { | 740 | if (!newcon) { |
727 | result = -ENOMEM; | 741 | result = -ENOMEM; |
728 | goto accept_err; | 742 | goto accept_err; |
@@ -732,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con) | |||
732 | struct connection *othercon = newcon->othercon; | 746 | struct connection *othercon = newcon->othercon; |
733 | 747 | ||
734 | if (!othercon) { | 748 | if (!othercon) { |
735 | othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); | 749 | othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); |
736 | if (!othercon) { | 750 | if (!othercon) { |
737 | log_print("failed to allocate incoming socket"); | 751 | log_print("failed to allocate incoming socket"); |
738 | mutex_unlock(&newcon->sock_mutex); | 752 | mutex_unlock(&newcon->sock_mutex); |
@@ -1421,7 +1435,7 @@ static int work_start(void) | |||
1421 | static void stop_conn(struct connection *con) | 1435 | static void stop_conn(struct connection *con) |
1422 | { | 1436 | { |
1423 | con->flags |= 0x0F; | 1437 | con->flags |= 0x0F; |
1424 | if (con->sock) | 1438 | if (con->sock && con->sock->sk) |
1425 | con->sock->sk->sk_user_data = NULL; | 1439 | con->sock->sk->sk_user_data = NULL; |
1426 | } | 1440 | } |
1427 | 1441 | ||
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index a9a9618c0d3f..1311e6426287 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void); | |||
19 | int dlm_lowcomms_close(int nodeid); | 19 | int dlm_lowcomms_close(int nodeid); |
20 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); | 20 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); |
21 | void dlm_lowcomms_commit_buffer(void *mh); | 21 | void dlm_lowcomms_commit_buffer(void *mh); |
22 | int dlm_lowcomms_connect_node(int nodeid); | ||
22 | 23 | ||
23 | #endif /* __LOWCOMMS_DOT_H__ */ | 24 | #endif /* __LOWCOMMS_DOT_H__ */ |
24 | 25 | ||
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 26133f05ae3a..b128775913b2 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -17,6 +17,7 @@ | |||
17 | #include "recover.h" | 17 | #include "recover.h" |
18 | #include "rcom.h" | 18 | #include "rcom.h" |
19 | #include "config.h" | 19 | #include "config.h" |
20 | #include "lowcomms.h" | ||
20 | 21 | ||
21 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | 22 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) |
22 | { | 23 | { |
@@ -45,9 +46,9 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | |||
45 | static int dlm_add_member(struct dlm_ls *ls, int nodeid) | 46 | static int dlm_add_member(struct dlm_ls *ls, int nodeid) |
46 | { | 47 | { |
47 | struct dlm_member *memb; | 48 | struct dlm_member *memb; |
48 | int w; | 49 | int w, error; |
49 | 50 | ||
50 | memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); | 51 | memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); |
51 | if (!memb) | 52 | if (!memb) |
52 | return -ENOMEM; | 53 | return -ENOMEM; |
53 | 54 | ||
@@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid) | |||
57 | return w; | 58 | return w; |
58 | } | 59 | } |
59 | 60 | ||
61 | error = dlm_lowcomms_connect_node(nodeid); | ||
62 | if (error < 0) { | ||
63 | kfree(memb); | ||
64 | return error; | ||
65 | } | ||
66 | |||
60 | memb->nodeid = nodeid; | 67 | memb->nodeid = nodeid; |
61 | memb->weight = w; | 68 | memb->weight = w; |
62 | add_ordered_member(ls, memb); | 69 | add_ordered_member(ls, memb); |
@@ -136,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls) | |||
136 | 143 | ||
137 | ls->ls_total_weight = total; | 144 | ls->ls_total_weight = total; |
138 | 145 | ||
139 | array = kmalloc(sizeof(int) * total, GFP_KERNEL); | 146 | array = kmalloc(sizeof(int) * total, ls->ls_allocation); |
140 | if (!array) | 147 | if (!array) |
141 | return; | 148 | return; |
142 | 149 | ||
@@ -219,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
219 | continue; | 226 | continue; |
220 | log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); | 227 | log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); |
221 | 228 | ||
222 | memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); | 229 | memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); |
223 | if (!memb) | 230 | if (!memb) |
224 | return -ENOMEM; | 231 | return -ENOMEM; |
225 | memb->nodeid = rv->new[i]; | 232 | memb->nodeid = rv->new[i]; |
@@ -334,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
334 | int *ids = NULL, *new = NULL; | 341 | int *ids = NULL, *new = NULL; |
335 | int error, ids_count = 0, new_count = 0; | 342 | int error, ids_count = 0, new_count = 0; |
336 | 343 | ||
337 | rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); | 344 | rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation); |
338 | if (!rv) | 345 | if (!rv) |
339 | return -ENOMEM; | 346 | return -ENOMEM; |
340 | 347 | ||
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index daa4183fbb84..7a2307c08911 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c | |||
@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) | |||
35 | struct rq_entry *e; | 35 | struct rq_entry *e; |
36 | int length = ms->m_header.h_length - sizeof(struct dlm_message); | 36 | int length = ms->m_header.h_length - sizeof(struct dlm_message); |
37 | 37 | ||
38 | e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); | 38 | e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation); |
39 | if (!e) { | 39 | if (!e) { |
40 | log_print("dlm_add_requestqueue: out of memory len %d", length); | 40 | log_print("dlm_add_requestqueue: out of memory len %d", length); |
41 | return; | 41 | return; |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index fa4c7e7d15d9..12d649602d3a 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
28 | #include <linux/key.h> | 28 | #include <linux/key.h> |
29 | #include <linux/seq_file.h> | 29 | #include <linux/seq_file.h> |
30 | #include <linux/smp_lock.h> | ||
30 | #include <linux/file.h> | 31 | #include <linux/file.h> |
31 | #include <linux/crypto.h> | 32 | #include <linux/crypto.h> |
32 | #include "ecryptfs_kernel.h" | 33 | #include "ecryptfs_kernel.h" |
@@ -120,9 +121,13 @@ static void ecryptfs_put_super(struct super_block *sb) | |||
120 | { | 121 | { |
121 | struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); | 122 | struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); |
122 | 123 | ||
124 | lock_kernel(); | ||
125 | |||
123 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); | 126 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); |
124 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); | 127 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); |
125 | ecryptfs_set_superblock_private(sb, NULL); | 128 | ecryptfs_set_superblock_private(sb, NULL); |
129 | |||
130 | unlock_kernel(); | ||
126 | } | 131 | } |
127 | 132 | ||
128 | /** | 133 | /** |
diff --git a/fs/eventfd.c b/fs/eventfd.c index 2a701d593d35..3f0e1974abdc 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/anon_inodes.h> | 16 | #include <linux/anon_inodes.h> |
17 | #include <linux/eventfd.h> | 17 | #include <linux/eventfd.h> |
18 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
19 | #include <linux/module.h> | ||
19 | 20 | ||
20 | struct eventfd_ctx { | 21 | struct eventfd_ctx { |
21 | wait_queue_head_t wqh; | 22 | wait_queue_head_t wqh; |
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n) | |||
56 | 57 | ||
57 | return n; | 58 | return n; |
58 | } | 59 | } |
60 | EXPORT_SYMBOL_GPL(eventfd_signal); | ||
59 | 61 | ||
60 | static int eventfd_release(struct inode *inode, struct file *file) | 62 | static int eventfd_release(struct inode *inode, struct file *file) |
61 | { | 63 | { |
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd) | |||
197 | 199 | ||
198 | return file; | 200 | return file; |
199 | } | 201 | } |
202 | EXPORT_SYMBOL_GPL(eventfd_fget); | ||
200 | 203 | ||
201 | SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) | 204 | SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) |
202 | { | 205 | { |
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/pagemap.h> | 35 | #include <linux/pagemap.h> |
36 | #include <linux/perf_counter.h> | ||
36 | #include <linux/highmem.h> | 37 | #include <linux/highmem.h> |
37 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
38 | #include <linux/key.h> | 39 | #include <linux/key.h> |
@@ -922,6 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
922 | task_lock(tsk); | 923 | task_lock(tsk); |
923 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); | 924 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); |
924 | task_unlock(tsk); | 925 | task_unlock(tsk); |
926 | perf_counter_comm(tsk); | ||
925 | } | 927 | } |
926 | 928 | ||
927 | int flush_old_exec(struct linux_binprm * bprm) | 929 | int flush_old_exec(struct linux_binprm * bprm) |
@@ -990,6 +992,13 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
990 | 992 | ||
991 | current->personality &= ~bprm->per_clear; | 993 | current->personality &= ~bprm->per_clear; |
992 | 994 | ||
995 | /* | ||
996 | * Flush performance counters when crossing a | ||
997 | * security domain: | ||
998 | */ | ||
999 | if (!get_dumpable(current->mm)) | ||
1000 | perf_counter_exit_task(current); | ||
1001 | |||
993 | /* An exec changes our domain. We are no longer part of the thread | 1002 | /* An exec changes our domain. We are no longer part of the thread |
994 | group */ | 1003 | group */ |
995 | 1004 | ||
@@ -1016,7 +1025,7 @@ void install_exec_creds(struct linux_binprm *bprm) | |||
1016 | commit_creds(bprm->cred); | 1025 | commit_creds(bprm->cred); |
1017 | bprm->cred = NULL; | 1026 | bprm->cred = NULL; |
1018 | 1027 | ||
1019 | /* cred_exec_mutex must be held at least to this point to prevent | 1028 | /* cred_guard_mutex must be held at least to this point to prevent |
1020 | * ptrace_attach() from altering our determination of the task's | 1029 | * ptrace_attach() from altering our determination of the task's |
1021 | * credentials; any time after this it may be unlocked */ | 1030 | * credentials; any time after this it may be unlocked */ |
1022 | 1031 | ||
@@ -1026,7 +1035,7 @@ EXPORT_SYMBOL(install_exec_creds); | |||
1026 | 1035 | ||
1027 | /* | 1036 | /* |
1028 | * determine how safe it is to execute the proposed program | 1037 | * determine how safe it is to execute the proposed program |
1029 | * - the caller must hold current->cred_exec_mutex to protect against | 1038 | * - the caller must hold current->cred_guard_mutex to protect against |
1030 | * PTRACE_ATTACH | 1039 | * PTRACE_ATTACH |
1031 | */ | 1040 | */ |
1032 | int check_unsafe_exec(struct linux_binprm *bprm) | 1041 | int check_unsafe_exec(struct linux_binprm *bprm) |
@@ -1268,7 +1277,7 @@ int do_execve(char * filename, | |||
1268 | if (!bprm) | 1277 | if (!bprm) |
1269 | goto out_files; | 1278 | goto out_files; |
1270 | 1279 | ||
1271 | retval = mutex_lock_interruptible(¤t->cred_exec_mutex); | 1280 | retval = mutex_lock_interruptible(¤t->cred_guard_mutex); |
1272 | if (retval < 0) | 1281 | if (retval < 0) |
1273 | goto out_free; | 1282 | goto out_free; |
1274 | current->in_execve = 1; | 1283 | current->in_execve = 1; |
@@ -1331,7 +1340,7 @@ int do_execve(char * filename, | |||
1331 | /* execve succeeded */ | 1340 | /* execve succeeded */ |
1332 | current->fs->in_exec = 0; | 1341 | current->fs->in_exec = 0; |
1333 | current->in_execve = 0; | 1342 | current->in_execve = 0; |
1334 | mutex_unlock(¤t->cred_exec_mutex); | 1343 | mutex_unlock(¤t->cred_guard_mutex); |
1335 | acct_update_integrals(current); | 1344 | acct_update_integrals(current); |
1336 | free_bprm(bprm); | 1345 | free_bprm(bprm); |
1337 | if (displaced) | 1346 | if (displaced) |
@@ -1354,7 +1363,7 @@ out_unmark: | |||
1354 | 1363 | ||
1355 | out_unlock: | 1364 | out_unlock: |
1356 | current->in_execve = 0; | 1365 | current->in_execve = 0; |
1357 | mutex_unlock(¤t->cred_exec_mutex); | 1366 | mutex_unlock(¤t->cred_guard_mutex); |
1358 | 1367 | ||
1359 | out_free: | 1368 | out_free: |
1360 | free_bprm(bprm); | 1369 | free_bprm(bprm); |
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index b1512c4bb8c7..24667eedc023 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
@@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or, | |||
175 | 175 | ||
176 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); | 176 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); |
177 | 177 | ||
178 | int osd_req_read_kern(struct osd_request *or, | ||
179 | const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); | ||
180 | |||
181 | int osd_req_write_kern(struct osd_request *or, | ||
182 | const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); | ||
183 | |||
184 | #endif /*ifndef __EXOFS_COM_H__*/ | 178 | #endif /*ifndef __EXOFS_COM_H__*/ |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ba8d9fab4693..77d0a295eb1c 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
59 | struct inode *inode) | 59 | struct inode *inode) |
60 | { | 60 | { |
61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
62 | struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; | ||
63 | 62 | ||
64 | pcol->sbi = sbi; | 63 | pcol->sbi = sbi; |
65 | pcol->req_q = req_q; | 64 | pcol->req_q = osd_request_queue(sbi->s_dev); |
66 | pcol->inode = inode; | 65 | pcol->inode = inode; |
67 | pcol->expected_pages = expected_pages; | 66 | pcol->expected_pages = expected_pages; |
68 | 67 | ||
@@ -266,7 +265,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
266 | goto err; | 265 | goto err; |
267 | } | 266 | } |
268 | 267 | ||
269 | osd_req_read(or, &obj, pcol->bio, i_start); | 268 | osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); |
270 | 269 | ||
271 | if (is_sync) { | 270 | if (is_sync) { |
272 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | 271 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); |
@@ -522,7 +521,8 @@ static int write_exec(struct page_collect *pcol) | |||
522 | 521 | ||
523 | *pcol_copy = *pcol; | 522 | *pcol_copy = *pcol; |
524 | 523 | ||
525 | osd_req_write(or, &obj, pcol_copy->bio, i_start); | 524 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ |
525 | osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); | ||
526 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | 526 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); |
527 | if (unlikely(ret)) { | 527 | if (unlikely(ret)) { |
528 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | 528 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); |
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b249ae97fb15..b3d2ccb87aaa 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c | |||
@@ -50,10 +50,10 @@ int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | |||
50 | 50 | ||
51 | /* FIXME: should be include in osd_sense_info */ | 51 | /* FIXME: should be include in osd_sense_info */ |
52 | if (in_resid) | 52 | if (in_resid) |
53 | *in_resid = or->in.req ? or->in.req->data_len : 0; | 53 | *in_resid = or->in.req ? or->in.req->resid_len : 0; |
54 | 54 | ||
55 | if (out_resid) | 55 | if (out_resid) |
56 | *out_resid = or->out.req ? or->out.req->data_len : 0; | 56 | *out_resid = or->out.req ? or->out.req->resid_len : 0; |
57 | 57 | ||
58 | return ret; | 58 | return ret; |
59 | } | 59 | } |
@@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | |||
125 | 125 | ||
126 | return -EIO; | 126 | return -EIO; |
127 | } | 127 | } |
128 | |||
129 | int osd_req_read_kern(struct osd_request *or, | ||
130 | const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) | ||
131 | { | ||
132 | struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; | ||
133 | struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); | ||
134 | |||
135 | if (!bio) | ||
136 | return -ENOMEM; | ||
137 | |||
138 | osd_req_read(or, obj, bio, offset); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | int osd_req_write_kern(struct osd_request *or, | ||
143 | const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) | ||
144 | { | ||
145 | struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; | ||
146 | struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); | ||
147 | |||
148 | if (!bio) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | osd_req_write(or, obj, bio, offset); | ||
152 | return 0; | ||
153 | } | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 9f1985e857e2..8216c5b77b53 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -200,20 +200,21 @@ static const struct export_operations exofs_export_ops; | |||
200 | /* | 200 | /* |
201 | * Write the superblock to the OSD | 201 | * Write the superblock to the OSD |
202 | */ | 202 | */ |
203 | static void exofs_write_super(struct super_block *sb) | 203 | static int exofs_sync_fs(struct super_block *sb, int wait) |
204 | { | 204 | { |
205 | struct exofs_sb_info *sbi; | 205 | struct exofs_sb_info *sbi; |
206 | struct exofs_fscb *fscb; | 206 | struct exofs_fscb *fscb; |
207 | struct osd_request *or; | 207 | struct osd_request *or; |
208 | struct osd_obj_id obj; | 208 | struct osd_obj_id obj; |
209 | int ret; | 209 | int ret = -ENOMEM; |
210 | 210 | ||
211 | fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); | 211 | fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); |
212 | if (!fscb) { | 212 | if (!fscb) { |
213 | EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); | 213 | EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); |
214 | return; | 214 | return -ENOMEM; |
215 | } | 215 | } |
216 | 216 | ||
217 | lock_super(sb); | ||
217 | lock_kernel(); | 218 | lock_kernel(); |
218 | sbi = sb->s_fs_info; | 219 | sbi = sb->s_fs_info; |
219 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 220 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
@@ -246,7 +247,17 @@ out: | |||
246 | if (or) | 247 | if (or) |
247 | osd_end_request(or); | 248 | osd_end_request(or); |
248 | unlock_kernel(); | 249 | unlock_kernel(); |
250 | unlock_super(sb); | ||
249 | kfree(fscb); | 251 | kfree(fscb); |
252 | return ret; | ||
253 | } | ||
254 | |||
255 | static void exofs_write_super(struct super_block *sb) | ||
256 | { | ||
257 | if (!(sb->s_flags & MS_RDONLY)) | ||
258 | exofs_sync_fs(sb, 1); | ||
259 | else | ||
260 | sb->s_dirt = 0; | ||
250 | } | 261 | } |
251 | 262 | ||
252 | /* | 263 | /* |
@@ -258,6 +269,11 @@ static void exofs_put_super(struct super_block *sb) | |||
258 | int num_pend; | 269 | int num_pend; |
259 | struct exofs_sb_info *sbi = sb->s_fs_info; | 270 | struct exofs_sb_info *sbi = sb->s_fs_info; |
260 | 271 | ||
272 | lock_kernel(); | ||
273 | |||
274 | if (sb->s_dirt) | ||
275 | exofs_write_super(sb); | ||
276 | |||
261 | /* make sure there are no pending commands */ | 277 | /* make sure there are no pending commands */ |
262 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; | 278 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; |
263 | num_pend = atomic_read(&sbi->s_curr_pending)) { | 279 | num_pend = atomic_read(&sbi->s_curr_pending)) { |
@@ -271,6 +287,8 @@ static void exofs_put_super(struct super_block *sb) | |||
271 | osduld_put_device(sbi->s_dev); | 287 | osduld_put_device(sbi->s_dev); |
272 | kfree(sb->s_fs_info); | 288 | kfree(sb->s_fs_info); |
273 | sb->s_fs_info = NULL; | 289 | sb->s_fs_info = NULL; |
290 | |||
291 | unlock_kernel(); | ||
274 | } | 292 | } |
275 | 293 | ||
276 | /* | 294 | /* |
@@ -484,6 +502,7 @@ static const struct super_operations exofs_sops = { | |||
484 | .delete_inode = exofs_delete_inode, | 502 | .delete_inode = exofs_delete_inode, |
485 | .put_super = exofs_put_super, | 503 | .put_super = exofs_put_super, |
486 | .write_super = exofs_write_super, | 504 | .write_super = exofs_write_super, |
505 | .sync_fs = exofs_sync_fs, | ||
487 | .statfs = exofs_statfs, | 506 | .statfs = exofs_statfs, |
488 | }; | 507 | }; |
489 | 508 | ||
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index e0b2b43c1fdb..f42af45cfd88 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_EXT2_FS) += ext2.o | 5 | obj-$(CONFIG_EXT2_FS) += ext2.o |
6 | 6 | ||
7 | ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o | 8 | ioctl.o namei.o super.o symlink.o |
9 | 9 | ||
10 | ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 10 | ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 2999d72153b7..003500498c22 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
@@ -720,5 +720,5 @@ const struct file_operations ext2_dir_operations = { | |||
720 | #ifdef CONFIG_COMPAT | 720 | #ifdef CONFIG_COMPAT |
721 | .compat_ioctl = ext2_compat_ioctl, | 721 | .compat_ioctl = ext2_compat_ioctl, |
722 | #endif | 722 | #endif |
723 | .fsync = ext2_sync_file, | 723 | .fsync = simple_fsync, |
724 | }; | 724 | }; |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 3203042b36ef..b2bbf45039e0 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -113,9 +113,6 @@ extern int ext2_empty_dir (struct inode *); | |||
113 | extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); | 113 | extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); |
114 | extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); | 114 | extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); |
115 | 115 | ||
116 | /* fsync.c */ | ||
117 | extern int ext2_sync_file (struct file *, struct dentry *, int); | ||
118 | |||
119 | /* ialloc.c */ | 116 | /* ialloc.c */ |
120 | extern struct inode * ext2_new_inode (struct inode *, int); | 117 | extern struct inode * ext2_new_inode (struct inode *, int); |
121 | extern void ext2_free_inode (struct inode *); | 118 | extern void ext2_free_inode (struct inode *); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 45ed07122182..2b9e47dc9222 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -55,7 +55,7 @@ const struct file_operations ext2_file_operations = { | |||
55 | .mmap = generic_file_mmap, | 55 | .mmap = generic_file_mmap, |
56 | .open = generic_file_open, | 56 | .open = generic_file_open, |
57 | .release = ext2_release_file, | 57 | .release = ext2_release_file, |
58 | .fsync = ext2_sync_file, | 58 | .fsync = simple_fsync, |
59 | .splice_read = generic_file_splice_read, | 59 | .splice_read = generic_file_splice_read, |
60 | .splice_write = generic_file_splice_write, | 60 | .splice_write = generic_file_splice_write, |
61 | }; | 61 | }; |
@@ -72,7 +72,7 @@ const struct file_operations ext2_xip_file_operations = { | |||
72 | .mmap = xip_file_mmap, | 72 | .mmap = xip_file_mmap, |
73 | .open = generic_file_open, | 73 | .open = generic_file_open, |
74 | .release = ext2_release_file, | 74 | .release = ext2_release_file, |
75 | .fsync = ext2_sync_file, | 75 | .fsync = simple_fsync, |
76 | }; | 76 | }; |
77 | #endif | 77 | #endif |
78 | 78 | ||
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c deleted file mode 100644 index fc66c93fcb5c..000000000000 --- a/fs/ext2/fsync.c +++ /dev/null | |||
@@ -1,50 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/fsync.c | ||
3 | * | ||
4 | * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) | ||
5 | * from | ||
6 | * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) | ||
7 | * Laboratoire MASI - Institut Blaise Pascal | ||
8 | * Universite Pierre et Marie Curie (Paris VI) | ||
9 | * from | ||
10 | * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds | ||
11 | * | ||
12 | * ext2fs fsync primitive | ||
13 | * | ||
14 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
15 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
16 | * | ||
17 | * Removed unnecessary code duplication for little endian machines | ||
18 | * and excessive __inline__s. | ||
19 | * Andi Kleen, 1997 | ||
20 | * | ||
21 | * Major simplications and cleanup - we only need to do the metadata, because | ||
22 | * we can depend on generic_block_fdatasync() to sync the data blocks. | ||
23 | */ | ||
24 | |||
25 | #include "ext2.h" | ||
26 | #include <linux/buffer_head.h> /* for sync_mapping_buffers() */ | ||
27 | |||
28 | |||
29 | /* | ||
30 | * File may be NULL when we are called. Perhaps we shouldn't | ||
31 | * even pass file to fsync ? | ||
32 | */ | ||
33 | |||
34 | int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync) | ||
35 | { | ||
36 | struct inode *inode = dentry->d_inode; | ||
37 | int err; | ||
38 | int ret; | ||
39 | |||
40 | ret = sync_mapping_buffers(inode->i_mapping); | ||
41 | if (!(inode->i_state & I_DIRTY)) | ||
42 | return ret; | ||
43 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
44 | return ret; | ||
45 | |||
46 | err = ext2_sync_inode(inode); | ||
47 | if (ret == 0) | ||
48 | ret = err; | ||
49 | return ret; | ||
50 | } | ||
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index acf678831103..29ed682061f6 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -41,8 +41,6 @@ MODULE_AUTHOR("Remy Card and others"); | |||
41 | MODULE_DESCRIPTION("Second Extended Filesystem"); | 41 | MODULE_DESCRIPTION("Second Extended Filesystem"); |
42 | MODULE_LICENSE("GPL"); | 42 | MODULE_LICENSE("GPL"); |
43 | 43 | ||
44 | static int ext2_update_inode(struct inode * inode, int do_sync); | ||
45 | |||
46 | /* | 44 | /* |
47 | * Test whether an inode is a fast symlink. | 45 | * Test whether an inode is a fast symlink. |
48 | */ | 46 | */ |
@@ -66,7 +64,7 @@ void ext2_delete_inode (struct inode * inode) | |||
66 | goto no_delete; | 64 | goto no_delete; |
67 | EXT2_I(inode)->i_dtime = get_seconds(); | 65 | EXT2_I(inode)->i_dtime = get_seconds(); |
68 | mark_inode_dirty(inode); | 66 | mark_inode_dirty(inode); |
69 | ext2_update_inode(inode, inode_needs_sync(inode)); | 67 | ext2_write_inode(inode, inode_needs_sync(inode)); |
70 | 68 | ||
71 | inode->i_size = 0; | 69 | inode->i_size = 0; |
72 | if (inode->i_blocks) | 70 | if (inode->i_blocks) |
@@ -1337,7 +1335,7 @@ bad_inode: | |||
1337 | return ERR_PTR(ret); | 1335 | return ERR_PTR(ret); |
1338 | } | 1336 | } |
1339 | 1337 | ||
1340 | static int ext2_update_inode(struct inode * inode, int do_sync) | 1338 | int ext2_write_inode(struct inode *inode, int do_sync) |
1341 | { | 1339 | { |
1342 | struct ext2_inode_info *ei = EXT2_I(inode); | 1340 | struct ext2_inode_info *ei = EXT2_I(inode); |
1343 | struct super_block *sb = inode->i_sb; | 1341 | struct super_block *sb = inode->i_sb; |
@@ -1442,11 +1440,6 @@ static int ext2_update_inode(struct inode * inode, int do_sync) | |||
1442 | return err; | 1440 | return err; |
1443 | } | 1441 | } |
1444 | 1442 | ||
1445 | int ext2_write_inode(struct inode *inode, int wait) | ||
1446 | { | ||
1447 | return ext2_update_inode(inode, wait); | ||
1448 | } | ||
1449 | |||
1450 | int ext2_sync_inode(struct inode *inode) | 1443 | int ext2_sync_inode(struct inode *inode) |
1451 | { | 1444 | { |
1452 | struct writeback_control wbc = { | 1445 | struct writeback_control wbc = { |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 5c4afe652245..458999638c3d 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -42,6 +42,7 @@ static void ext2_sync_super(struct super_block *sb, | |||
42 | struct ext2_super_block *es); | 42 | struct ext2_super_block *es); |
43 | static int ext2_remount (struct super_block * sb, int * flags, char * data); | 43 | static int ext2_remount (struct super_block * sb, int * flags, char * data); |
44 | static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); | 44 | static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); |
45 | static int ext2_sync_fs(struct super_block *sb, int wait); | ||
45 | 46 | ||
46 | void ext2_error (struct super_block * sb, const char * function, | 47 | void ext2_error (struct super_block * sb, const char * function, |
47 | const char * fmt, ...) | 48 | const char * fmt, ...) |
@@ -114,6 +115,11 @@ static void ext2_put_super (struct super_block * sb) | |||
114 | int i; | 115 | int i; |
115 | struct ext2_sb_info *sbi = EXT2_SB(sb); | 116 | struct ext2_sb_info *sbi = EXT2_SB(sb); |
116 | 117 | ||
118 | lock_kernel(); | ||
119 | |||
120 | if (sb->s_dirt) | ||
121 | ext2_write_super(sb); | ||
122 | |||
117 | ext2_xattr_put_super(sb); | 123 | ext2_xattr_put_super(sb); |
118 | if (!(sb->s_flags & MS_RDONLY)) { | 124 | if (!(sb->s_flags & MS_RDONLY)) { |
119 | struct ext2_super_block *es = sbi->s_es; | 125 | struct ext2_super_block *es = sbi->s_es; |
@@ -135,7 +141,7 @@ static void ext2_put_super (struct super_block * sb) | |||
135 | kfree(sbi->s_blockgroup_lock); | 141 | kfree(sbi->s_blockgroup_lock); |
136 | kfree(sbi); | 142 | kfree(sbi); |
137 | 143 | ||
138 | return; | 144 | unlock_kernel(); |
139 | } | 145 | } |
140 | 146 | ||
141 | static struct kmem_cache * ext2_inode_cachep; | 147 | static struct kmem_cache * ext2_inode_cachep; |
@@ -304,6 +310,7 @@ static const struct super_operations ext2_sops = { | |||
304 | .delete_inode = ext2_delete_inode, | 310 | .delete_inode = ext2_delete_inode, |
305 | .put_super = ext2_put_super, | 311 | .put_super = ext2_put_super, |
306 | .write_super = ext2_write_super, | 312 | .write_super = ext2_write_super, |
313 | .sync_fs = ext2_sync_fs, | ||
307 | .statfs = ext2_statfs, | 314 | .statfs = ext2_statfs, |
308 | .remount_fs = ext2_remount, | 315 | .remount_fs = ext2_remount, |
309 | .clear_inode = ext2_clear_inode, | 316 | .clear_inode = ext2_clear_inode, |
@@ -1093,6 +1100,7 @@ failed_mount: | |||
1093 | brelse(bh); | 1100 | brelse(bh); |
1094 | failed_sbi: | 1101 | failed_sbi: |
1095 | sb->s_fs_info = NULL; | 1102 | sb->s_fs_info = NULL; |
1103 | kfree(sbi->s_blockgroup_lock); | ||
1096 | kfree(sbi); | 1104 | kfree(sbi); |
1097 | return ret; | 1105 | return ret; |
1098 | } | 1106 | } |
@@ -1126,25 +1134,36 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) | |||
1126 | * set s_state to EXT2_VALID_FS after some corrections. | 1134 | * set s_state to EXT2_VALID_FS after some corrections. |
1127 | */ | 1135 | */ |
1128 | 1136 | ||
1129 | void ext2_write_super (struct super_block * sb) | 1137 | static int ext2_sync_fs(struct super_block *sb, int wait) |
1130 | { | 1138 | { |
1131 | struct ext2_super_block * es; | 1139 | struct ext2_super_block *es = EXT2_SB(sb)->s_es; |
1140 | |||
1132 | lock_kernel(); | 1141 | lock_kernel(); |
1133 | if (!(sb->s_flags & MS_RDONLY)) { | 1142 | if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { |
1134 | es = EXT2_SB(sb)->s_es; | 1143 | ext2_debug("setting valid to 0\n"); |
1135 | 1144 | es->s_state &= cpu_to_le16(~EXT2_VALID_FS); | |
1136 | if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { | 1145 | es->s_free_blocks_count = |
1137 | ext2_debug ("setting valid to 0\n"); | 1146 | cpu_to_le32(ext2_count_free_blocks(sb)); |
1138 | es->s_state &= cpu_to_le16(~EXT2_VALID_FS); | 1147 | es->s_free_inodes_count = |
1139 | es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); | 1148 | cpu_to_le32(ext2_count_free_inodes(sb)); |
1140 | es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); | 1149 | es->s_mtime = cpu_to_le32(get_seconds()); |
1141 | es->s_mtime = cpu_to_le32(get_seconds()); | 1150 | ext2_sync_super(sb, es); |
1142 | ext2_sync_super(sb, es); | 1151 | } else { |
1143 | } else | 1152 | ext2_commit_super(sb, es); |
1144 | ext2_commit_super (sb, es); | ||
1145 | } | 1153 | } |
1146 | sb->s_dirt = 0; | 1154 | sb->s_dirt = 0; |
1147 | unlock_kernel(); | 1155 | unlock_kernel(); |
1156 | |||
1157 | return 0; | ||
1158 | } | ||
1159 | |||
1160 | |||
1161 | void ext2_write_super(struct super_block *sb) | ||
1162 | { | ||
1163 | if (!(sb->s_flags & MS_RDONLY)) | ||
1164 | ext2_sync_fs(sb, 1); | ||
1165 | else | ||
1166 | sb->s_dirt = 0; | ||
1148 | } | 1167 | } |
1149 | 1168 | ||
1150 | static int ext2_remount (struct super_block * sb, int * flags, char * data) | 1169 | static int ext2_remount (struct super_block * sb, int * flags, char * data) |
@@ -1156,6 +1175,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1156 | unsigned long old_sb_flags; | 1175 | unsigned long old_sb_flags; |
1157 | int err; | 1176 | int err; |
1158 | 1177 | ||
1178 | lock_kernel(); | ||
1179 | |||
1159 | /* Store the old options */ | 1180 | /* Store the old options */ |
1160 | old_sb_flags = sb->s_flags; | 1181 | old_sb_flags = sb->s_flags; |
1161 | old_opts.s_mount_opt = sbi->s_mount_opt; | 1182 | old_opts.s_mount_opt = sbi->s_mount_opt; |
@@ -1191,12 +1212,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1191 | sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; | 1212 | sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; |
1192 | sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; | 1213 | sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; |
1193 | } | 1214 | } |
1194 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 1215 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { |
1216 | unlock_kernel(); | ||
1195 | return 0; | 1217 | return 0; |
1218 | } | ||
1196 | if (*flags & MS_RDONLY) { | 1219 | if (*flags & MS_RDONLY) { |
1197 | if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || | 1220 | if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || |
1198 | !(sbi->s_mount_state & EXT2_VALID_FS)) | 1221 | !(sbi->s_mount_state & EXT2_VALID_FS)) { |
1222 | unlock_kernel(); | ||
1199 | return 0; | 1223 | return 0; |
1224 | } | ||
1200 | /* | 1225 | /* |
1201 | * OK, we are remounting a valid rw partition rdonly, so set | 1226 | * OK, we are remounting a valid rw partition rdonly, so set |
1202 | * the rdonly flag and then mark the partition as valid again. | 1227 | * the rdonly flag and then mark the partition as valid again. |
@@ -1223,12 +1248,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1223 | sb->s_flags &= ~MS_RDONLY; | 1248 | sb->s_flags &= ~MS_RDONLY; |
1224 | } | 1249 | } |
1225 | ext2_sync_super(sb, es); | 1250 | ext2_sync_super(sb, es); |
1251 | unlock_kernel(); | ||
1226 | return 0; | 1252 | return 0; |
1227 | restore_opts: | 1253 | restore_opts: |
1228 | sbi->s_mount_opt = old_opts.s_mount_opt; | 1254 | sbi->s_mount_opt = old_opts.s_mount_opt; |
1229 | sbi->s_resuid = old_opts.s_resuid; | 1255 | sbi->s_resuid = old_opts.s_resuid; |
1230 | sbi->s_resgid = old_opts.s_resgid; | 1256 | sbi->s_resgid = old_opts.s_resgid; |
1231 | sb->s_flags = old_sb_flags; | 1257 | sb->s_flags = old_sb_flags; |
1258 | unlock_kernel(); | ||
1232 | return err; | 1259 | return err; |
1233 | } | 1260 | } |
1234 | 1261 | ||
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 225202db8974..27967f92e820 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -649,7 +649,7 @@ do_more: | |||
649 | count = overflow; | 649 | count = overflow; |
650 | goto do_more; | 650 | goto do_more; |
651 | } | 651 | } |
652 | sb->s_dirt = 1; | 652 | |
653 | error_return: | 653 | error_return: |
654 | brelse(bitmap_bh); | 654 | brelse(bitmap_bh); |
655 | ext3_std_error(sb, err); | 655 | ext3_std_error(sb, err); |
@@ -1708,7 +1708,6 @@ allocated: | |||
1708 | if (!fatal) | 1708 | if (!fatal) |
1709 | fatal = err; | 1709 | fatal = err; |
1710 | 1710 | ||
1711 | sb->s_dirt = 1; | ||
1712 | if (fatal) | 1711 | if (fatal) |
1713 | goto out; | 1712 | goto out; |
1714 | 1713 | ||
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index dd13d60d524b..b39991285136 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -181,7 +181,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) | |||
181 | err = ext3_journal_dirty_metadata(handle, bitmap_bh); | 181 | err = ext3_journal_dirty_metadata(handle, bitmap_bh); |
182 | if (!fatal) | 182 | if (!fatal) |
183 | fatal = err; | 183 | fatal = err; |
184 | sb->s_dirt = 1; | 184 | |
185 | error_return: | 185 | error_return: |
186 | brelse(bitmap_bh); | 186 | brelse(bitmap_bh); |
187 | ext3_std_error(sb, fatal); | 187 | ext3_std_error(sb, fatal); |
@@ -537,7 +537,6 @@ got: | |||
537 | percpu_counter_dec(&sbi->s_freeinodes_counter); | 537 | percpu_counter_dec(&sbi->s_freeinodes_counter); |
538 | if (S_ISDIR(mode)) | 538 | if (S_ISDIR(mode)) |
539 | percpu_counter_inc(&sbi->s_dirs_counter); | 539 | percpu_counter_inc(&sbi->s_dirs_counter); |
540 | sb->s_dirt = 1; | ||
541 | 540 | ||
542 | inode->i_uid = current_fsuid(); | 541 | inode->i_uid = current_fsuid(); |
543 | if (test_opt (sb, GRPID)) | 542 | if (test_opt (sb, GRPID)) |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index fcfa24361856..b0248c6d5d4c 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -2960,7 +2960,6 @@ static int ext3_do_update_inode(handle_t *handle, | |||
2960 | ext3_update_dynamic_rev(sb); | 2960 | ext3_update_dynamic_rev(sb); |
2961 | EXT3_SET_RO_COMPAT_FEATURE(sb, | 2961 | EXT3_SET_RO_COMPAT_FEATURE(sb, |
2962 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); | 2962 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); |
2963 | sb->s_dirt = 1; | ||
2964 | handle->h_sync = 1; | 2963 | handle->h_sync = 1; |
2965 | err = ext3_journal_dirty_metadata(handle, | 2964 | err = ext3_journal_dirty_metadata(handle, |
2966 | EXT3_SB(sb)->s_sbh); | 2965 | EXT3_SB(sb)->s_sbh); |
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 78fdf3836370..8a0b26340b54 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c | |||
@@ -934,7 +934,6 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) | |||
934 | EXT3_INODES_PER_GROUP(sb)); | 934 | EXT3_INODES_PER_GROUP(sb)); |
935 | 935 | ||
936 | ext3_journal_dirty_metadata(handle, sbi->s_sbh); | 936 | ext3_journal_dirty_metadata(handle, sbi->s_sbh); |
937 | sb->s_dirt = 1; | ||
938 | 937 | ||
939 | exit_journal: | 938 | exit_journal: |
940 | unlock_super(sb); | 939 | unlock_super(sb); |
@@ -1066,7 +1065,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, | |||
1066 | } | 1065 | } |
1067 | es->s_blocks_count = cpu_to_le32(o_blocks_count + add); | 1066 | es->s_blocks_count = cpu_to_le32(o_blocks_count + add); |
1068 | ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); | 1067 | ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); |
1069 | sb->s_dirt = 1; | ||
1070 | unlock_super(sb); | 1068 | unlock_super(sb); |
1071 | ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, | 1069 | ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, |
1072 | o_blocks_count + add); | 1070 | o_blocks_count + add); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 599dbfe504c3..26aa64dee6aa 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -67,7 +67,6 @@ static const char *ext3_decode_error(struct super_block * sb, int errno, | |||
67 | static int ext3_remount (struct super_block * sb, int * flags, char * data); | 67 | static int ext3_remount (struct super_block * sb, int * flags, char * data); |
68 | static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); | 68 | static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); |
69 | static int ext3_unfreeze(struct super_block *sb); | 69 | static int ext3_unfreeze(struct super_block *sb); |
70 | static void ext3_write_super (struct super_block * sb); | ||
71 | static int ext3_freeze(struct super_block *sb); | 70 | static int ext3_freeze(struct super_block *sb); |
72 | 71 | ||
73 | /* | 72 | /* |
@@ -399,6 +398,8 @@ static void ext3_put_super (struct super_block * sb) | |||
399 | struct ext3_super_block *es = sbi->s_es; | 398 | struct ext3_super_block *es = sbi->s_es; |
400 | int i, err; | 399 | int i, err; |
401 | 400 | ||
401 | lock_kernel(); | ||
402 | |||
402 | ext3_xattr_put_super(sb); | 403 | ext3_xattr_put_super(sb); |
403 | err = journal_destroy(sbi->s_journal); | 404 | err = journal_destroy(sbi->s_journal); |
404 | sbi->s_journal = NULL; | 405 | sbi->s_journal = NULL; |
@@ -447,7 +448,8 @@ static void ext3_put_super (struct super_block * sb) | |||
447 | sb->s_fs_info = NULL; | 448 | sb->s_fs_info = NULL; |
448 | kfree(sbi->s_blockgroup_lock); | 449 | kfree(sbi->s_blockgroup_lock); |
449 | kfree(sbi); | 450 | kfree(sbi); |
450 | return; | 451 | |
452 | unlock_kernel(); | ||
451 | } | 453 | } |
452 | 454 | ||
453 | static struct kmem_cache *ext3_inode_cachep; | 455 | static struct kmem_cache *ext3_inode_cachep; |
@@ -761,7 +763,6 @@ static const struct super_operations ext3_sops = { | |||
761 | .dirty_inode = ext3_dirty_inode, | 763 | .dirty_inode = ext3_dirty_inode, |
762 | .delete_inode = ext3_delete_inode, | 764 | .delete_inode = ext3_delete_inode, |
763 | .put_super = ext3_put_super, | 765 | .put_super = ext3_put_super, |
764 | .write_super = ext3_write_super, | ||
765 | .sync_fs = ext3_sync_fs, | 766 | .sync_fs = ext3_sync_fs, |
766 | .freeze_fs = ext3_freeze, | 767 | .freeze_fs = ext3_freeze, |
767 | .unfreeze_fs = ext3_unfreeze, | 768 | .unfreeze_fs = ext3_unfreeze, |
@@ -1696,7 +1697,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1696 | goto failed_mount; | 1697 | goto failed_mount; |
1697 | } | 1698 | } |
1698 | 1699 | ||
1699 | hblock = bdev_hardsect_size(sb->s_bdev); | 1700 | hblock = bdev_logical_block_size(sb->s_bdev); |
1700 | if (sb->s_blocksize != blocksize) { | 1701 | if (sb->s_blocksize != blocksize) { |
1701 | /* | 1702 | /* |
1702 | * Make sure the blocksize for the filesystem is larger | 1703 | * Make sure the blocksize for the filesystem is larger |
@@ -1785,7 +1786,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1785 | #else | 1786 | #else |
1786 | es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); | 1787 | es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); |
1787 | #endif | 1788 | #endif |
1788 | sb->s_dirt = 1; | ||
1789 | } | 1789 | } |
1790 | 1790 | ||
1791 | if (sbi->s_blocks_per_group > blocksize * 8) { | 1791 | if (sbi->s_blocks_per_group > blocksize * 8) { |
@@ -2021,6 +2021,7 @@ failed_mount: | |||
2021 | brelse(bh); | 2021 | brelse(bh); |
2022 | out_fail: | 2022 | out_fail: |
2023 | sb->s_fs_info = NULL; | 2023 | sb->s_fs_info = NULL; |
2024 | kfree(sbi->s_blockgroup_lock); | ||
2024 | kfree(sbi); | 2025 | kfree(sbi); |
2025 | lock_kernel(); | 2026 | lock_kernel(); |
2026 | return ret; | 2027 | return ret; |
@@ -2119,7 +2120,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, | |||
2119 | } | 2120 | } |
2120 | 2121 | ||
2121 | blocksize = sb->s_blocksize; | 2122 | blocksize = sb->s_blocksize; |
2122 | hblock = bdev_hardsect_size(bdev); | 2123 | hblock = bdev_logical_block_size(bdev); |
2123 | if (blocksize < hblock) { | 2124 | if (blocksize < hblock) { |
2124 | printk(KERN_ERR | 2125 | printk(KERN_ERR |
2125 | "EXT3-fs: blocksize too small for journal device.\n"); | 2126 | "EXT3-fs: blocksize too small for journal device.\n"); |
@@ -2264,7 +2265,6 @@ static int ext3_load_journal(struct super_block *sb, | |||
2264 | if (journal_devnum && | 2265 | if (journal_devnum && |
2265 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 2266 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
2266 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 2267 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
2267 | sb->s_dirt = 1; | ||
2268 | 2268 | ||
2269 | /* Make sure we flush the recovery flag to disk. */ | 2269 | /* Make sure we flush the recovery flag to disk. */ |
2270 | ext3_commit_super(sb, es, 1); | 2270 | ext3_commit_super(sb, es, 1); |
@@ -2307,7 +2307,6 @@ static int ext3_create_journal(struct super_block * sb, | |||
2307 | EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); | 2307 | EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); |
2308 | 2308 | ||
2309 | es->s_journal_inum = cpu_to_le32(journal_inum); | 2309 | es->s_journal_inum = cpu_to_le32(journal_inum); |
2310 | sb->s_dirt = 1; | ||
2311 | 2310 | ||
2312 | /* Make sure we flush the recovery flag to disk. */ | 2311 | /* Make sure we flush the recovery flag to disk. */ |
2313 | ext3_commit_super(sb, es, 1); | 2312 | ext3_commit_super(sb, es, 1); |
@@ -2353,7 +2352,6 @@ static void ext3_mark_recovery_complete(struct super_block * sb, | |||
2353 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && | 2352 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && |
2354 | sb->s_flags & MS_RDONLY) { | 2353 | sb->s_flags & MS_RDONLY) { |
2355 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); | 2354 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); |
2356 | sb->s_dirt = 0; | ||
2357 | ext3_commit_super(sb, es, 1); | 2355 | ext3_commit_super(sb, es, 1); |
2358 | } | 2356 | } |
2359 | unlock_super(sb); | 2357 | unlock_super(sb); |
@@ -2412,29 +2410,14 @@ int ext3_force_commit(struct super_block *sb) | |||
2412 | return 0; | 2410 | return 0; |
2413 | 2411 | ||
2414 | journal = EXT3_SB(sb)->s_journal; | 2412 | journal = EXT3_SB(sb)->s_journal; |
2415 | sb->s_dirt = 0; | ||
2416 | ret = ext3_journal_force_commit(journal); | 2413 | ret = ext3_journal_force_commit(journal); |
2417 | return ret; | 2414 | return ret; |
2418 | } | 2415 | } |
2419 | 2416 | ||
2420 | /* | ||
2421 | * Ext3 always journals updates to the superblock itself, so we don't | ||
2422 | * have to propagate any other updates to the superblock on disk at this | ||
2423 | * point. (We can probably nuke this function altogether, and remove | ||
2424 | * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...) | ||
2425 | */ | ||
2426 | static void ext3_write_super (struct super_block * sb) | ||
2427 | { | ||
2428 | if (mutex_trylock(&sb->s_lock) != 0) | ||
2429 | BUG(); | ||
2430 | sb->s_dirt = 0; | ||
2431 | } | ||
2432 | |||
2433 | static int ext3_sync_fs(struct super_block *sb, int wait) | 2417 | static int ext3_sync_fs(struct super_block *sb, int wait) |
2434 | { | 2418 | { |
2435 | tid_t target; | 2419 | tid_t target; |
2436 | 2420 | ||
2437 | sb->s_dirt = 0; | ||
2438 | if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { | 2421 | if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { |
2439 | if (wait) | 2422 | if (wait) |
2440 | log_wait_commit(EXT3_SB(sb)->s_journal, target); | 2423 | log_wait_commit(EXT3_SB(sb)->s_journal, target); |
@@ -2450,7 +2433,6 @@ static int ext3_freeze(struct super_block *sb) | |||
2450 | { | 2433 | { |
2451 | int error = 0; | 2434 | int error = 0; |
2452 | journal_t *journal; | 2435 | journal_t *journal; |
2453 | sb->s_dirt = 0; | ||
2454 | 2436 | ||
2455 | if (!(sb->s_flags & MS_RDONLY)) { | 2437 | if (!(sb->s_flags & MS_RDONLY)) { |
2456 | journal = EXT3_SB(sb)->s_journal; | 2438 | journal = EXT3_SB(sb)->s_journal; |
@@ -2508,7 +2490,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2508 | int i; | 2490 | int i; |
2509 | #endif | 2491 | #endif |
2510 | 2492 | ||
2493 | lock_kernel(); | ||
2494 | |||
2511 | /* Store the original options */ | 2495 | /* Store the original options */ |
2496 | lock_super(sb); | ||
2512 | old_sb_flags = sb->s_flags; | 2497 | old_sb_flags = sb->s_flags; |
2513 | old_opts.s_mount_opt = sbi->s_mount_opt; | 2498 | old_opts.s_mount_opt = sbi->s_mount_opt; |
2514 | old_opts.s_resuid = sbi->s_resuid; | 2499 | old_opts.s_resuid = sbi->s_resuid; |
@@ -2616,6 +2601,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2616 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) | 2601 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) |
2617 | kfree(old_opts.s_qf_names[i]); | 2602 | kfree(old_opts.s_qf_names[i]); |
2618 | #endif | 2603 | #endif |
2604 | unlock_super(sb); | ||
2605 | unlock_kernel(); | ||
2619 | return 0; | 2606 | return 0; |
2620 | restore_opts: | 2607 | restore_opts: |
2621 | sb->s_flags = old_sb_flags; | 2608 | sb->s_flags = old_sb_flags; |
@@ -2632,6 +2619,8 @@ restore_opts: | |||
2632 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 2619 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
2633 | } | 2620 | } |
2634 | #endif | 2621 | #endif |
2622 | unlock_super(sb); | ||
2623 | unlock_kernel(); | ||
2635 | return err; | 2624 | return err; |
2636 | } | 2625 | } |
2637 | 2626 | ||
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 83b7be849bd5..545e37c4b91e 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c | |||
@@ -463,7 +463,6 @@ static void ext3_xattr_update_super_block(handle_t *handle, | |||
463 | 463 | ||
464 | if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { | 464 | if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { |
465 | EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR); | 465 | EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR); |
466 | sb->s_dirt = 1; | ||
467 | ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); | 466 | ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); |
468 | } | 467 | } |
469 | } | 468 | } |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index a8ff003a00f7..8a34710ecf40 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -5,8 +5,8 @@ | |||
5 | obj-$(CONFIG_EXT4_FS) += ext4.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o |
10 | 10 | ||
11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 53c72ad85877..e2126d70dff5 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/buffer_head.h> | 19 | #include <linux/buffer_head.h> |
20 | #include "ext4.h" | 20 | #include "ext4.h" |
21 | #include "ext4_jbd2.h" | 21 | #include "ext4_jbd2.h" |
22 | #include "group.h" | ||
23 | #include "mballoc.h" | 22 | #include "mballoc.h" |
24 | 23 | ||
25 | /* | 24 | /* |
@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
88 | ext4_group_t block_group, struct ext4_group_desc *gdp) | 87 | ext4_group_t block_group, struct ext4_group_desc *gdp) |
89 | { | 88 | { |
90 | int bit, bit_max; | 89 | int bit, bit_max; |
90 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
91 | unsigned free_blocks, group_blocks; | 91 | unsigned free_blocks, group_blocks; |
92 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 92 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
93 | 93 | ||
@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
123 | bit_max += ext4_bg_num_gdb(sb, block_group); | 123 | bit_max += ext4_bg_num_gdb(sb, block_group); |
124 | } | 124 | } |
125 | 125 | ||
126 | if (block_group == sbi->s_groups_count - 1) { | 126 | if (block_group == ngroups - 1) { |
127 | /* | 127 | /* |
128 | * Even though mke2fs always initialize first and last group | 128 | * Even though mke2fs always initialize first and last group |
129 | * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need | 129 | * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need |
@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
131 | */ | 131 | */ |
132 | group_blocks = ext4_blocks_count(sbi->s_es) - | 132 | group_blocks = ext4_blocks_count(sbi->s_es) - |
133 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 133 | le32_to_cpu(sbi->s_es->s_first_data_block) - |
134 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); | 134 | (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); |
135 | } else { | 135 | } else { |
136 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 136 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
137 | } | 137 | } |
@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, | |||
205 | { | 205 | { |
206 | unsigned int group_desc; | 206 | unsigned int group_desc; |
207 | unsigned int offset; | 207 | unsigned int offset; |
208 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
208 | struct ext4_group_desc *desc; | 209 | struct ext4_group_desc *desc; |
209 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 210 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
210 | 211 | ||
211 | if (block_group >= sbi->s_groups_count) { | 212 | if (block_group >= ngroups) { |
212 | ext4_error(sb, "ext4_get_group_desc", | 213 | ext4_error(sb, "ext4_get_group_desc", |
213 | "block_group >= groups_count - " | 214 | "block_group >= groups_count - " |
214 | "block_group = %u, groups_count = %u", | 215 | "block_group = %u, groups_count = %u", |
215 | block_group, sbi->s_groups_count); | 216 | block_group, ngroups); |
216 | 217 | ||
217 | return NULL; | 218 | return NULL; |
218 | } | 219 | } |
219 | smp_rmb(); | ||
220 | 220 | ||
221 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 221 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
222 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 222 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
326 | unlock_buffer(bh); | 326 | unlock_buffer(bh); |
327 | return bh; | 327 | return bh; |
328 | } | 328 | } |
329 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 329 | ext4_lock_group(sb, block_group); |
330 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 330 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
331 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 331 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
332 | set_bitmap_uptodate(bh); | 332 | set_bitmap_uptodate(bh); |
333 | set_buffer_uptodate(bh); | 333 | set_buffer_uptodate(bh); |
334 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 334 | ext4_unlock_group(sb, block_group); |
335 | unlock_buffer(bh); | 335 | unlock_buffer(bh); |
336 | return bh; | 336 | return bh; |
337 | } | 337 | } |
338 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 338 | ext4_unlock_group(sb, block_group); |
339 | if (buffer_uptodate(bh)) { | 339 | if (buffer_uptodate(bh)) { |
340 | /* | 340 | /* |
341 | * if not uninit if bh is uptodate, | 341 | * if not uninit if bh is uptodate, |
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
451 | down_write(&grp->alloc_sem); | 451 | down_write(&grp->alloc_sem); |
452 | for (i = 0, blocks_freed = 0; i < count; i++) { | 452 | for (i = 0, blocks_freed = 0; i < count; i++) { |
453 | BUFFER_TRACE(bitmap_bh, "clear bit"); | 453 | BUFFER_TRACE(bitmap_bh, "clear bit"); |
454 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 454 | if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), |
455 | bit + i, bitmap_bh->b_data)) { | 455 | bit + i, bitmap_bh->b_data)) { |
456 | ext4_error(sb, __func__, | 456 | ext4_error(sb, __func__, |
457 | "bit already cleared for block %llu", | 457 | "bit already cleared for block %llu", |
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
461 | blocks_freed++; | 461 | blocks_freed++; |
462 | } | 462 | } |
463 | } | 463 | } |
464 | spin_lock(sb_bgl_lock(sbi, block_group)); | 464 | ext4_lock_group(sb, block_group); |
465 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | 465 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); |
466 | ext4_free_blks_set(sb, desc, blk_free_count); | 466 | ext4_free_blks_set(sb, desc, blk_free_count); |
467 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | 467 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); |
468 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 468 | ext4_unlock_group(sb, block_group); |
469 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | 469 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); |
470 | 470 | ||
471 | if (sbi->s_log_groups_per_flex) { | 471 | if (sbi->s_log_groups_per_flex) { |
@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
665 | ext4_fsblk_t desc_count; | 665 | ext4_fsblk_t desc_count; |
666 | struct ext4_group_desc *gdp; | 666 | struct ext4_group_desc *gdp; |
667 | ext4_group_t i; | 667 | ext4_group_t i; |
668 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 668 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
669 | #ifdef EXT4FS_DEBUG | 669 | #ifdef EXT4FS_DEBUG |
670 | struct ext4_super_block *es; | 670 | struct ext4_super_block *es; |
671 | ext4_fsblk_t bitmap_count; | 671 | ext4_fsblk_t bitmap_count; |
@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
677 | bitmap_count = 0; | 677 | bitmap_count = 0; |
678 | gdp = NULL; | 678 | gdp = NULL; |
679 | 679 | ||
680 | smp_rmb(); | ||
681 | for (i = 0; i < ngroups; i++) { | 680 | for (i = 0; i < ngroups; i++) { |
682 | gdp = ext4_get_group_desc(sb, i, NULL); | 681 | gdp = ext4_get_group_desc(sb, i, NULL); |
683 | if (!gdp) | 682 | if (!gdp) |
@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
700 | return bitmap_count; | 699 | return bitmap_count; |
701 | #else | 700 | #else |
702 | desc_count = 0; | 701 | desc_count = 0; |
703 | smp_rmb(); | ||
704 | for (i = 0; i < ngroups; i++) { | 702 | for (i = 0; i < ngroups; i++) { |
705 | gdp = ext4_get_group_desc(sb, i, NULL); | 703 | gdp = ext4_get_group_desc(sb, i, NULL); |
706 | if (!gdp) | 704 | if (!gdp) |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c new file mode 100644 index 000000000000..50784ef07563 --- /dev/null +++ b/fs/ext4/block_validity.c | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/block_validity.c | ||
3 | * | ||
4 | * Copyright (C) 2009 | ||
5 | * Theodore Ts'o (tytso@mit.edu) | ||
6 | * | ||
7 | * Track which blocks in the filesystem are metadata blocks that | ||
8 | * should never be used as data blocks by files or directories. | ||
9 | */ | ||
10 | |||
11 | #include <linux/time.h> | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/namei.h> | ||
14 | #include <linux/quotaops.h> | ||
15 | #include <linux/buffer_head.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/swap.h> | ||
18 | #include <linux/pagemap.h> | ||
19 | #include <linux/version.h> | ||
20 | #include <linux/blkdev.h> | ||
21 | #include <linux/mutex.h> | ||
22 | #include "ext4.h" | ||
23 | |||
24 | struct ext4_system_zone { | ||
25 | struct rb_node node; | ||
26 | ext4_fsblk_t start_blk; | ||
27 | unsigned int count; | ||
28 | }; | ||
29 | |||
30 | static struct kmem_cache *ext4_system_zone_cachep; | ||
31 | |||
32 | int __init init_ext4_system_zone(void) | ||
33 | { | ||
34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, | ||
35 | SLAB_RECLAIM_ACCOUNT); | ||
36 | if (ext4_system_zone_cachep == NULL) | ||
37 | return -ENOMEM; | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | void exit_ext4_system_zone(void) | ||
42 | { | ||
43 | kmem_cache_destroy(ext4_system_zone_cachep); | ||
44 | } | ||
45 | |||
46 | static inline int can_merge(struct ext4_system_zone *entry1, | ||
47 | struct ext4_system_zone *entry2) | ||
48 | { | ||
49 | if ((entry1->start_blk + entry1->count) == entry2->start_blk) | ||
50 | return 1; | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * Mark a range of blocks as belonging to the "system zone" --- that | ||
56 | * is, filesystem metadata blocks which should never be used by | ||
57 | * inodes. | ||
58 | */ | ||
59 | static int add_system_zone(struct ext4_sb_info *sbi, | ||
60 | ext4_fsblk_t start_blk, | ||
61 | unsigned int count) | ||
62 | { | ||
63 | struct ext4_system_zone *new_entry = NULL, *entry; | ||
64 | struct rb_node **n = &sbi->system_blks.rb_node, *node; | ||
65 | struct rb_node *parent = NULL, *new_node = NULL; | ||
66 | |||
67 | while (*n) { | ||
68 | parent = *n; | ||
69 | entry = rb_entry(parent, struct ext4_system_zone, node); | ||
70 | if (start_blk < entry->start_blk) | ||
71 | n = &(*n)->rb_left; | ||
72 | else if (start_blk >= (entry->start_blk + entry->count)) | ||
73 | n = &(*n)->rb_right; | ||
74 | else { | ||
75 | if (start_blk + count > (entry->start_blk + | ||
76 | entry->count)) | ||
77 | entry->count = (start_blk + count - | ||
78 | entry->start_blk); | ||
79 | new_node = *n; | ||
80 | new_entry = rb_entry(new_node, struct ext4_system_zone, | ||
81 | node); | ||
82 | break; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | if (!new_entry) { | ||
87 | new_entry = kmem_cache_alloc(ext4_system_zone_cachep, | ||
88 | GFP_KERNEL); | ||
89 | if (!new_entry) | ||
90 | return -ENOMEM; | ||
91 | new_entry->start_blk = start_blk; | ||
92 | new_entry->count = count; | ||
93 | new_node = &new_entry->node; | ||
94 | |||
95 | rb_link_node(new_node, parent, n); | ||
96 | rb_insert_color(new_node, &sbi->system_blks); | ||
97 | } | ||
98 | |||
99 | /* Can we merge to the left? */ | ||
100 | node = rb_prev(new_node); | ||
101 | if (node) { | ||
102 | entry = rb_entry(node, struct ext4_system_zone, node); | ||
103 | if (can_merge(entry, new_entry)) { | ||
104 | new_entry->start_blk = entry->start_blk; | ||
105 | new_entry->count += entry->count; | ||
106 | rb_erase(node, &sbi->system_blks); | ||
107 | kmem_cache_free(ext4_system_zone_cachep, entry); | ||
108 | } | ||
109 | } | ||
110 | |||
111 | /* Can we merge to the right? */ | ||
112 | node = rb_next(new_node); | ||
113 | if (node) { | ||
114 | entry = rb_entry(node, struct ext4_system_zone, node); | ||
115 | if (can_merge(new_entry, entry)) { | ||
116 | new_entry->count += entry->count; | ||
117 | rb_erase(node, &sbi->system_blks); | ||
118 | kmem_cache_free(ext4_system_zone_cachep, entry); | ||
119 | } | ||
120 | } | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | static void debug_print_tree(struct ext4_sb_info *sbi) | ||
125 | { | ||
126 | struct rb_node *node; | ||
127 | struct ext4_system_zone *entry; | ||
128 | int first = 1; | ||
129 | |||
130 | printk(KERN_INFO "System zones: "); | ||
131 | node = rb_first(&sbi->system_blks); | ||
132 | while (node) { | ||
133 | entry = rb_entry(node, struct ext4_system_zone, node); | ||
134 | printk("%s%llu-%llu", first ? "" : ", ", | ||
135 | entry->start_blk, entry->start_blk + entry->count - 1); | ||
136 | first = 0; | ||
137 | node = rb_next(node); | ||
138 | } | ||
139 | printk("\n"); | ||
140 | } | ||
141 | |||
142 | int ext4_setup_system_zone(struct super_block *sb) | ||
143 | { | ||
144 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
145 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
146 | struct ext4_group_desc *gdp; | ||
147 | ext4_group_t i; | ||
148 | int flex_size = ext4_flex_bg_size(sbi); | ||
149 | int ret; | ||
150 | |||
151 | if (!test_opt(sb, BLOCK_VALIDITY)) { | ||
152 | if (EXT4_SB(sb)->system_blks.rb_node) | ||
153 | ext4_release_system_zone(sb); | ||
154 | return 0; | ||
155 | } | ||
156 | if (EXT4_SB(sb)->system_blks.rb_node) | ||
157 | return 0; | ||
158 | |||
159 | for (i=0; i < ngroups; i++) { | ||
160 | if (ext4_bg_has_super(sb, i) && | ||
161 | ((i < 5) || ((i % flex_size) == 0))) | ||
162 | add_system_zone(sbi, ext4_group_first_block_no(sb, i), | ||
163 | sbi->s_gdb_count + 1); | ||
164 | gdp = ext4_get_group_desc(sb, i, NULL); | ||
165 | ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); | ||
166 | if (ret) | ||
167 | return ret; | ||
168 | ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1); | ||
169 | if (ret) | ||
170 | return ret; | ||
171 | ret = add_system_zone(sbi, ext4_inode_table(sb, gdp), | ||
172 | sbi->s_itb_per_group); | ||
173 | if (ret) | ||
174 | return ret; | ||
175 | } | ||
176 | |||
177 | if (test_opt(sb, DEBUG)) | ||
178 | debug_print_tree(EXT4_SB(sb)); | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | /* Called when the filesystem is unmounted */ | ||
183 | void ext4_release_system_zone(struct super_block *sb) | ||
184 | { | ||
185 | struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node; | ||
186 | struct rb_node *parent; | ||
187 | struct ext4_system_zone *entry; | ||
188 | |||
189 | while (n) { | ||
190 | /* Do the node's children first */ | ||
191 | if (n->rb_left) { | ||
192 | n = n->rb_left; | ||
193 | continue; | ||
194 | } | ||
195 | if (n->rb_right) { | ||
196 | n = n->rb_right; | ||
197 | continue; | ||
198 | } | ||
199 | /* | ||
200 | * The node has no children; free it, and then zero | ||
201 | * out parent's link to it. Finally go to the | ||
202 | * beginning of the loop and try to free the parent | ||
203 | * node. | ||
204 | */ | ||
205 | parent = rb_parent(n); | ||
206 | entry = rb_entry(n, struct ext4_system_zone, node); | ||
207 | kmem_cache_free(ext4_system_zone_cachep, entry); | ||
208 | if (!parent) | ||
209 | EXT4_SB(sb)->system_blks.rb_node = NULL; | ||
210 | else if (parent->rb_left == n) | ||
211 | parent->rb_left = NULL; | ||
212 | else if (parent->rb_right == n) | ||
213 | parent->rb_right = NULL; | ||
214 | n = parent; | ||
215 | } | ||
216 | EXT4_SB(sb)->system_blks.rb_node = NULL; | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Returns 1 if the passed-in block region (start_blk, | ||
221 | * start_blk+count) is valid; 0 if some part of the block region | ||
222 | * overlaps with filesystem metadata blocks. | ||
223 | */ | ||
224 | int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, | ||
225 | unsigned int count) | ||
226 | { | ||
227 | struct ext4_system_zone *entry; | ||
228 | struct rb_node *n = sbi->system_blks.rb_node; | ||
229 | |||
230 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || | ||
231 | (start_blk + count > ext4_blocks_count(sbi->s_es))) | ||
232 | return 0; | ||
233 | while (n) { | ||
234 | entry = rb_entry(n, struct ext4_system_zone, node); | ||
235 | if (start_blk + count - 1 < entry->start_blk) | ||
236 | n = n->rb_left; | ||
237 | else if (start_blk >= (entry->start_blk + entry->count)) | ||
238 | n = n->rb_right; | ||
239 | else | ||
240 | return 0; | ||
241 | } | ||
242 | return 1; | ||
243 | } | ||
244 | |||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b64789929a65..9dc93168e262 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp, | |||
131 | struct buffer_head *bh = NULL; | 131 | struct buffer_head *bh = NULL; |
132 | 132 | ||
133 | map_bh.b_state = 0; | 133 | map_bh.b_state = 0; |
134 | err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, | 134 | err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); |
135 | 0, 0, 0); | ||
136 | if (err > 0) { | 135 | if (err > 0) { |
137 | pgoff_t index = map_bh.b_blocknr >> | 136 | pgoff_t index = map_bh.b_blocknr >> |
138 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 137 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d0f15ef56de1..cc7d5edc38c9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -21,7 +21,14 @@ | |||
21 | #include <linux/magic.h> | 21 | #include <linux/magic.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/quota.h> | 23 | #include <linux/quota.h> |
24 | #include "ext4_i.h" | 24 | #include <linux/rwsem.h> |
25 | #include <linux/rbtree.h> | ||
26 | #include <linux/seqlock.h> | ||
27 | #include <linux/mutex.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/wait.h> | ||
30 | #include <linux/blockgroup_lock.h> | ||
31 | #include <linux/percpu_counter.h> | ||
25 | 32 | ||
26 | /* | 33 | /* |
27 | * The fourth extended filesystem constants/structures | 34 | * The fourth extended filesystem constants/structures |
@@ -46,6 +53,19 @@ | |||
46 | #define ext4_debug(f, a...) do {} while (0) | 53 | #define ext4_debug(f, a...) do {} while (0) |
47 | #endif | 54 | #endif |
48 | 55 | ||
56 | /* data type for block offset of block group */ | ||
57 | typedef int ext4_grpblk_t; | ||
58 | |||
59 | /* data type for filesystem-wide blocks number */ | ||
60 | typedef unsigned long long ext4_fsblk_t; | ||
61 | |||
62 | /* data type for file logical block number */ | ||
63 | typedef __u32 ext4_lblk_t; | ||
64 | |||
65 | /* data type for block group number */ | ||
66 | typedef unsigned int ext4_group_t; | ||
67 | |||
68 | |||
49 | /* prefer goal again. length */ | 69 | /* prefer goal again. length */ |
50 | #define EXT4_MB_HINT_MERGE 1 | 70 | #define EXT4_MB_HINT_MERGE 1 |
51 | /* blocks already reserved */ | 71 | /* blocks already reserved */ |
@@ -179,9 +199,6 @@ struct flex_groups { | |||
179 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ | 199 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ |
180 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ | 200 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ |
181 | 201 | ||
182 | #ifdef __KERNEL__ | ||
183 | #include "ext4_sb.h" | ||
184 | #endif | ||
185 | /* | 202 | /* |
186 | * Macro-instructions used to manage group descriptors | 203 | * Macro-instructions used to manage group descriptors |
187 | */ | 204 | */ |
@@ -297,10 +314,23 @@ struct ext4_new_group_data { | |||
297 | }; | 314 | }; |
298 | 315 | ||
299 | /* | 316 | /* |
300 | * Following is used by preallocation code to tell get_blocks() that we | 317 | * Flags used by ext4_get_blocks() |
301 | * want uninitialzed extents. | ||
302 | */ | 318 | */ |
303 | #define EXT4_CREATE_UNINITIALIZED_EXT 2 | 319 | /* Allocate any needed blocks and/or convert an unitialized |
320 | extent to be an initialized ext4 */ | ||
321 | #define EXT4_GET_BLOCKS_CREATE 0x0001 | ||
322 | /* Request the creation of an unitialized extent */ | ||
323 | #define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002 | ||
324 | #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ | ||
325 | EXT4_GET_BLOCKS_CREATE) | ||
326 | /* Caller is from the delayed allocation writeout path, | ||
327 | so set the magic i_delalloc_reserve_flag after taking the | ||
328 | inode allocation semaphore for */ | ||
329 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 | ||
330 | /* Call ext4_da_update_reserve_space() after successfully | ||
331 | allocating the blocks */ | ||
332 | #define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 | ||
333 | |||
304 | 334 | ||
305 | /* | 335 | /* |
306 | * ioctl commands | 336 | * ioctl commands |
@@ -516,6 +546,110 @@ do { \ | |||
516 | #endif /* defined(__KERNEL__) || defined(__linux__) */ | 546 | #endif /* defined(__KERNEL__) || defined(__linux__) */ |
517 | 547 | ||
518 | /* | 548 | /* |
549 | * storage for cached extent | ||
550 | */ | ||
551 | struct ext4_ext_cache { | ||
552 | ext4_fsblk_t ec_start; | ||
553 | ext4_lblk_t ec_block; | ||
554 | __u32 ec_len; /* must be 32bit to return holes */ | ||
555 | __u32 ec_type; | ||
556 | }; | ||
557 | |||
558 | /* | ||
559 | * fourth extended file system inode data in memory | ||
560 | */ | ||
561 | struct ext4_inode_info { | ||
562 | __le32 i_data[15]; /* unconverted */ | ||
563 | __u32 i_flags; | ||
564 | ext4_fsblk_t i_file_acl; | ||
565 | __u32 i_dtime; | ||
566 | |||
567 | /* | ||
568 | * i_block_group is the number of the block group which contains | ||
569 | * this file's inode. Constant across the lifetime of the inode, | ||
570 | * it is ued for making block allocation decisions - we try to | ||
571 | * place a file's data blocks near its inode block, and new inodes | ||
572 | * near to their parent directory's inode. | ||
573 | */ | ||
574 | ext4_group_t i_block_group; | ||
575 | __u32 i_state; /* Dynamic state flags for ext4 */ | ||
576 | |||
577 | ext4_lblk_t i_dir_start_lookup; | ||
578 | #ifdef CONFIG_EXT4_FS_XATTR | ||
579 | /* | ||
580 | * Extended attributes can be read independently of the main file | ||
581 | * data. Taking i_mutex even when reading would cause contention | ||
582 | * between readers of EAs and writers of regular file data, so | ||
583 | * instead we synchronize on xattr_sem when reading or changing | ||
584 | * EAs. | ||
585 | */ | ||
586 | struct rw_semaphore xattr_sem; | ||
587 | #endif | ||
588 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | ||
589 | struct posix_acl *i_acl; | ||
590 | struct posix_acl *i_default_acl; | ||
591 | #endif | ||
592 | |||
593 | struct list_head i_orphan; /* unlinked but open inodes */ | ||
594 | |||
595 | /* | ||
596 | * i_disksize keeps track of what the inode size is ON DISK, not | ||
597 | * in memory. During truncate, i_size is set to the new size by | ||
598 | * the VFS prior to calling ext4_truncate(), but the filesystem won't | ||
599 | * set i_disksize to 0 until the truncate is actually under way. | ||
600 | * | ||
601 | * The intent is that i_disksize always represents the blocks which | ||
602 | * are used by this file. This allows recovery to restart truncate | ||
603 | * on orphans if we crash during truncate. We actually write i_disksize | ||
604 | * into the on-disk inode when writing inodes out, instead of i_size. | ||
605 | * | ||
606 | * The only time when i_disksize and i_size may be different is when | ||
607 | * a truncate is in progress. The only things which change i_disksize | ||
608 | * are ext4_get_block (growth) and ext4_truncate (shrinkth). | ||
609 | */ | ||
610 | loff_t i_disksize; | ||
611 | |||
612 | /* | ||
613 | * i_data_sem is for serialising ext4_truncate() against | ||
614 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's | ||
615 | * data tree are chopped off during truncate. We can't do that in | ||
616 | * ext4 because whenever we perform intermediate commits during | ||
617 | * truncate, the inode and all the metadata blocks *must* be in a | ||
618 | * consistent state which allows truncation of the orphans to restart | ||
619 | * during recovery. Hence we must fix the get_block-vs-truncate race | ||
620 | * by other means, so we have i_data_sem. | ||
621 | */ | ||
622 | struct rw_semaphore i_data_sem; | ||
623 | struct inode vfs_inode; | ||
624 | struct jbd2_inode jinode; | ||
625 | |||
626 | struct ext4_ext_cache i_cached_extent; | ||
627 | /* | ||
628 | * File creation time. Its function is same as that of | ||
629 | * struct timespec i_{a,c,m}time in the generic inode. | ||
630 | */ | ||
631 | struct timespec i_crtime; | ||
632 | |||
633 | /* mballoc */ | ||
634 | struct list_head i_prealloc_list; | ||
635 | spinlock_t i_prealloc_lock; | ||
636 | |||
637 | /* ialloc */ | ||
638 | ext4_group_t i_last_alloc_group; | ||
639 | |||
640 | /* allocation reservation info for delalloc */ | ||
641 | unsigned int i_reserved_data_blocks; | ||
642 | unsigned int i_reserved_meta_blocks; | ||
643 | unsigned int i_allocated_meta_blocks; | ||
644 | unsigned short i_delalloc_reserved_flag; | ||
645 | |||
646 | /* on-disk additional length */ | ||
647 | __u16 i_extra_isize; | ||
648 | |||
649 | spinlock_t i_block_reservation_lock; | ||
650 | }; | ||
651 | |||
652 | /* | ||
519 | * File system states | 653 | * File system states |
520 | */ | 654 | */ |
521 | #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ | 655 | #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ |
@@ -560,6 +694,7 @@ do { \ | |||
560 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 694 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
561 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 695 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
562 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 696 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
697 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | ||
563 | 698 | ||
564 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 699 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
565 | #ifndef _LINUX_EXT2_FS_H | 700 | #ifndef _LINUX_EXT2_FS_H |
@@ -689,6 +824,137 @@ struct ext4_super_block { | |||
689 | }; | 824 | }; |
690 | 825 | ||
691 | #ifdef __KERNEL__ | 826 | #ifdef __KERNEL__ |
827 | /* | ||
828 | * fourth extended-fs super-block data in memory | ||
829 | */ | ||
830 | struct ext4_sb_info { | ||
831 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | ||
832 | unsigned long s_inodes_per_block;/* Number of inodes per block */ | ||
833 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ | ||
834 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ | ||
835 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ | ||
836 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | ||
837 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | ||
838 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | ||
839 | unsigned long s_overhead_last; /* Last calculated overhead */ | ||
840 | unsigned long s_blocks_last; /* Last seen block count */ | ||
841 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | ||
842 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | ||
843 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | ||
844 | struct buffer_head **s_group_desc; | ||
845 | unsigned long s_mount_opt; | ||
846 | ext4_fsblk_t s_sb_block; | ||
847 | uid_t s_resuid; | ||
848 | gid_t s_resgid; | ||
849 | unsigned short s_mount_state; | ||
850 | unsigned short s_pad; | ||
851 | int s_addr_per_block_bits; | ||
852 | int s_desc_per_block_bits; | ||
853 | int s_inode_size; | ||
854 | int s_first_ino; | ||
855 | unsigned int s_inode_readahead_blks; | ||
856 | spinlock_t s_next_gen_lock; | ||
857 | u32 s_next_generation; | ||
858 | u32 s_hash_seed[4]; | ||
859 | int s_def_hash_version; | ||
860 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | ||
861 | struct percpu_counter s_freeblocks_counter; | ||
862 | struct percpu_counter s_freeinodes_counter; | ||
863 | struct percpu_counter s_dirs_counter; | ||
864 | struct percpu_counter s_dirtyblocks_counter; | ||
865 | struct blockgroup_lock *s_blockgroup_lock; | ||
866 | struct proc_dir_entry *s_proc; | ||
867 | struct kobject s_kobj; | ||
868 | struct completion s_kobj_unregister; | ||
869 | |||
870 | /* Journaling */ | ||
871 | struct inode *s_journal_inode; | ||
872 | struct journal_s *s_journal; | ||
873 | struct list_head s_orphan; | ||
874 | struct mutex s_orphan_lock; | ||
875 | struct mutex s_resize_lock; | ||
876 | unsigned long s_commit_interval; | ||
877 | u32 s_max_batch_time; | ||
878 | u32 s_min_batch_time; | ||
879 | struct block_device *journal_bdev; | ||
880 | #ifdef CONFIG_JBD2_DEBUG | ||
881 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ | ||
882 | wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ | ||
883 | #endif | ||
884 | #ifdef CONFIG_QUOTA | ||
885 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | ||
886 | int s_jquota_fmt; /* Format of quota to use */ | ||
887 | #endif | ||
888 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | ||
889 | struct rb_root system_blks; | ||
890 | |||
891 | #ifdef EXTENTS_STATS | ||
892 | /* ext4 extents stats */ | ||
893 | unsigned long s_ext_min; | ||
894 | unsigned long s_ext_max; | ||
895 | unsigned long s_depth_max; | ||
896 | spinlock_t s_ext_stats_lock; | ||
897 | unsigned long s_ext_blocks; | ||
898 | unsigned long s_ext_extents; | ||
899 | #endif | ||
900 | |||
901 | /* for buddy allocator */ | ||
902 | struct ext4_group_info ***s_group_info; | ||
903 | struct inode *s_buddy_cache; | ||
904 | long s_blocks_reserved; | ||
905 | spinlock_t s_reserve_lock; | ||
906 | spinlock_t s_md_lock; | ||
907 | tid_t s_last_transaction; | ||
908 | unsigned short *s_mb_offsets; | ||
909 | unsigned int *s_mb_maxs; | ||
910 | |||
911 | /* tunables */ | ||
912 | unsigned long s_stripe; | ||
913 | unsigned int s_mb_stream_request; | ||
914 | unsigned int s_mb_max_to_scan; | ||
915 | unsigned int s_mb_min_to_scan; | ||
916 | unsigned int s_mb_stats; | ||
917 | unsigned int s_mb_order2_reqs; | ||
918 | unsigned int s_mb_group_prealloc; | ||
919 | /* where last allocation was done - for stream allocation */ | ||
920 | unsigned long s_mb_last_group; | ||
921 | unsigned long s_mb_last_start; | ||
922 | |||
923 | /* history to debug policy */ | ||
924 | struct ext4_mb_history *s_mb_history; | ||
925 | int s_mb_history_cur; | ||
926 | int s_mb_history_max; | ||
927 | int s_mb_history_num; | ||
928 | spinlock_t s_mb_history_lock; | ||
929 | int s_mb_history_filter; | ||
930 | |||
931 | /* stats for buddy allocator */ | ||
932 | spinlock_t s_mb_pa_lock; | ||
933 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | ||
934 | atomic_t s_bal_success; /* we found long enough chunks */ | ||
935 | atomic_t s_bal_allocated; /* in blocks */ | ||
936 | atomic_t s_bal_ex_scanned; /* total extents scanned */ | ||
937 | atomic_t s_bal_goals; /* goal hits */ | ||
938 | atomic_t s_bal_breaks; /* too long searches */ | ||
939 | atomic_t s_bal_2orders; /* 2^order hits */ | ||
940 | spinlock_t s_bal_lock; | ||
941 | unsigned long s_mb_buddies_generated; | ||
942 | unsigned long long s_mb_generation_time; | ||
943 | atomic_t s_mb_lost_chunks; | ||
944 | atomic_t s_mb_preallocated; | ||
945 | atomic_t s_mb_discarded; | ||
946 | |||
947 | /* locality groups */ | ||
948 | struct ext4_locality_group *s_locality_groups; | ||
949 | |||
950 | /* for write statistics */ | ||
951 | unsigned long s_sectors_written_start; | ||
952 | u64 s_kbytes_written; | ||
953 | |||
954 | unsigned int s_log_groups_per_flex; | ||
955 | struct flex_groups *s_flex_groups; | ||
956 | }; | ||
957 | |||
692 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 958 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
693 | { | 959 | { |
694 | return sb->s_fs_info; | 960 | return sb->s_fs_info; |
@@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode) | |||
704 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 970 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
705 | } | 971 | } |
706 | 972 | ||
707 | |||
708 | static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | 973 | static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) |
709 | { | 974 | { |
710 | return ino == EXT4_ROOT_INO || | 975 | return ino == EXT4_ROOT_INO || |
@@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
1014 | ext4_group_t block_group, | 1279 | ext4_group_t block_group, |
1015 | struct buffer_head ** bh); | 1280 | struct buffer_head ** bh); |
1016 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1281 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
1282 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | ||
1283 | ext4_group_t block_group); | ||
1284 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | ||
1285 | struct buffer_head *bh, | ||
1286 | ext4_group_t group, | ||
1287 | struct ext4_group_desc *desc); | ||
1288 | #define ext4_free_blocks_after_init(sb, group, desc) \ | ||
1289 | ext4_init_block_bitmap(sb, NULL, group, desc) | ||
1017 | 1290 | ||
1018 | /* dir.c */ | 1291 | /* dir.c */ |
1019 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1292 | extern int ext4_check_dir_entry(const char *, struct inode *, |
@@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | |||
1038 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1311 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1039 | extern unsigned long ext4_count_dirs(struct super_block *); | 1312 | extern unsigned long ext4_count_dirs(struct super_block *); |
1040 | extern void ext4_check_inodes_bitmap(struct super_block *); | 1313 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1314 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | ||
1315 | struct buffer_head *bh, | ||
1316 | ext4_group_t group, | ||
1317 | struct ext4_group_desc *desc); | ||
1318 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
1041 | 1319 | ||
1042 | /* mballoc.c */ | 1320 | /* mballoc.c */ |
1043 | extern long ext4_mb_stats; | 1321 | extern long ext4_mb_stats; |
@@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...) | |||
1123 | __attribute__ ((format (printf, 3, 4))); | 1401 | __attribute__ ((format (printf, 3, 4))); |
1124 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) | 1402 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
1125 | __attribute__ ((format (printf, 3, 4))); | 1403 | __attribute__ ((format (printf, 3, 4))); |
1404 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) | ||
1405 | __attribute__ ((format (printf, 3, 4))); | ||
1126 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, | 1406 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, |
1127 | const char *, const char *, ...) | 1407 | const char *, const char *, ...) |
1128 | __attribute__ ((format (printf, 4, 5))); | 1408 | __attribute__ ((format (printf, 4, 5))); |
@@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb, | |||
1161 | struct ext4_group_desc *bg, __u32 count); | 1441 | struct ext4_group_desc *bg, __u32 count); |
1162 | extern void ext4_itable_unused_set(struct super_block *sb, | 1442 | extern void ext4_itable_unused_set(struct super_block *sb, |
1163 | struct ext4_group_desc *bg, __u32 count); | 1443 | struct ext4_group_desc *bg, __u32 count); |
1444 | extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | ||
1445 | struct ext4_group_desc *gdp); | ||
1446 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | ||
1447 | struct ext4_group_desc *gdp); | ||
1164 | 1448 | ||
1165 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) | 1449 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) |
1166 | { | 1450 | { |
@@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
1228 | return grp_info[indexv][indexh]; | 1512 | return grp_info[indexv][indexh]; |
1229 | } | 1513 | } |
1230 | 1514 | ||
1515 | /* | ||
1516 | * Reading s_groups_count requires using smp_rmb() afterwards. See | ||
1517 | * the locking protocol documented in the comments of ext4_group_add() | ||
1518 | * in resize.c | ||
1519 | */ | ||
1520 | static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) | ||
1521 | { | ||
1522 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
1523 | |||
1524 | smp_rmb(); | ||
1525 | return ngroups; | ||
1526 | } | ||
1231 | 1527 | ||
1232 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, | 1528 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, |
1233 | ext4_group_t block_group) | 1529 | ext4_group_t block_group) |
@@ -1283,33 +1579,25 @@ struct ext4_group_info { | |||
1283 | }; | 1579 | }; |
1284 | 1580 | ||
1285 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 1581 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
1286 | #define EXT4_GROUP_INFO_LOCKED_BIT 1 | ||
1287 | 1582 | ||
1288 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 1583 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
1289 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 1584 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
1290 | 1585 | ||
1291 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 1586 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
1587 | ext4_group_t group) | ||
1292 | { | 1588 | { |
1293 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | 1589 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
1294 | |||
1295 | bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
1296 | } | 1590 | } |
1297 | 1591 | ||
1298 | static inline void ext4_unlock_group(struct super_block *sb, | 1592 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
1299 | ext4_group_t group) | ||
1300 | { | 1593 | { |
1301 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | 1594 | spin_lock(ext4_group_lock_ptr(sb, group)); |
1302 | |||
1303 | bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
1304 | } | 1595 | } |
1305 | 1596 | ||
1306 | static inline int ext4_is_group_locked(struct super_block *sb, | 1597 | static inline void ext4_unlock_group(struct super_block *sb, |
1307 | ext4_group_t group) | 1598 | ext4_group_t group) |
1308 | { | 1599 | { |
1309 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | 1600 | spin_unlock(ext4_group_lock_ptr(sb, group)); |
1310 | |||
1311 | return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, | ||
1312 | &(grinfo->bb_state)); | ||
1313 | } | 1601 | } |
1314 | 1602 | ||
1315 | /* | 1603 | /* |
@@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations; | |||
1326 | /* namei.c */ | 1614 | /* namei.c */ |
1327 | extern const struct inode_operations ext4_dir_inode_operations; | 1615 | extern const struct inode_operations ext4_dir_inode_operations; |
1328 | extern const struct inode_operations ext4_special_inode_operations; | 1616 | extern const struct inode_operations ext4_special_inode_operations; |
1617 | extern struct dentry *ext4_get_parent(struct dentry *child); | ||
1329 | 1618 | ||
1330 | /* symlink.c */ | 1619 | /* symlink.c */ |
1331 | extern const struct inode_operations ext4_symlink_inode_operations; | 1620 | extern const struct inode_operations ext4_symlink_inode_operations; |
1332 | extern const struct inode_operations ext4_fast_symlink_inode_operations; | 1621 | extern const struct inode_operations ext4_fast_symlink_inode_operations; |
1333 | 1622 | ||
1623 | /* block_validity */ | ||
1624 | extern void ext4_release_system_zone(struct super_block *sb); | ||
1625 | extern int ext4_setup_system_zone(struct super_block *sb); | ||
1626 | extern int __init init_ext4_system_zone(void); | ||
1627 | extern void exit_ext4_system_zone(void); | ||
1628 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | ||
1629 | ext4_fsblk_t start_blk, | ||
1630 | unsigned int count); | ||
1631 | |||
1334 | /* extents.c */ | 1632 | /* extents.c */ |
1335 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 1633 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
1336 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1634 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
@@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | |||
1338 | int chunk); | 1636 | int chunk); |
1339 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1637 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
1340 | ext4_lblk_t iblock, unsigned int max_blocks, | 1638 | ext4_lblk_t iblock, unsigned int max_blocks, |
1341 | struct buffer_head *bh_result, | 1639 | struct buffer_head *bh_result, int flags); |
1342 | int create, int extend_disksize); | ||
1343 | extern void ext4_ext_truncate(struct inode *); | 1640 | extern void ext4_ext_truncate(struct inode *); |
1344 | extern void ext4_ext_init(struct super_block *); | 1641 | extern void ext4_ext_init(struct super_block *); |
1345 | extern void ext4_ext_release(struct super_block *); | 1642 | extern void ext4_ext_release(struct super_block *); |
1346 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 1643 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, |
1347 | loff_t len); | 1644 | loff_t len); |
1348 | extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, | 1645 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, |
1349 | sector_t block, unsigned int max_blocks, | 1646 | sector_t block, unsigned int max_blocks, |
1350 | struct buffer_head *bh, int create, | 1647 | struct buffer_head *bh, int flags); |
1351 | int extend_disksize, int flag); | ||
1352 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1648 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1353 | __u64 start, __u64 len); | 1649 | __u64 start, __u64 len); |
1354 | 1650 | ||
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h deleted file mode 100644 index 4ce2187123aa..000000000000 --- a/fs/ext4/ext4_i.h +++ /dev/null | |||
@@ -1,140 +0,0 @@ | |||
1 | /* | ||
2 | * ext4_i.h | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/include/linux/minix_fs_i.h | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | */ | ||
15 | |||
16 | #ifndef _EXT4_I | ||
17 | #define _EXT4_I | ||
18 | |||
19 | #include <linux/rwsem.h> | ||
20 | #include <linux/rbtree.h> | ||
21 | #include <linux/seqlock.h> | ||
22 | #include <linux/mutex.h> | ||
23 | |||
24 | /* data type for block offset of block group */ | ||
25 | typedef int ext4_grpblk_t; | ||
26 | |||
27 | /* data type for filesystem-wide blocks number */ | ||
28 | typedef unsigned long long ext4_fsblk_t; | ||
29 | |||
30 | /* data type for file logical block number */ | ||
31 | typedef __u32 ext4_lblk_t; | ||
32 | |||
33 | /* data type for block group number */ | ||
34 | typedef unsigned int ext4_group_t; | ||
35 | |||
36 | /* | ||
37 | * storage for cached extent | ||
38 | */ | ||
39 | struct ext4_ext_cache { | ||
40 | ext4_fsblk_t ec_start; | ||
41 | ext4_lblk_t ec_block; | ||
42 | __u32 ec_len; /* must be 32bit to return holes */ | ||
43 | __u32 ec_type; | ||
44 | }; | ||
45 | |||
46 | /* | ||
47 | * fourth extended file system inode data in memory | ||
48 | */ | ||
49 | struct ext4_inode_info { | ||
50 | __le32 i_data[15]; /* unconverted */ | ||
51 | __u32 i_flags; | ||
52 | ext4_fsblk_t i_file_acl; | ||
53 | __u32 i_dtime; | ||
54 | |||
55 | /* | ||
56 | * i_block_group is the number of the block group which contains | ||
57 | * this file's inode. Constant across the lifetime of the inode, | ||
58 | * it is ued for making block allocation decisions - we try to | ||
59 | * place a file's data blocks near its inode block, and new inodes | ||
60 | * near to their parent directory's inode. | ||
61 | */ | ||
62 | ext4_group_t i_block_group; | ||
63 | __u32 i_state; /* Dynamic state flags for ext4 */ | ||
64 | |||
65 | ext4_lblk_t i_dir_start_lookup; | ||
66 | #ifdef CONFIG_EXT4_FS_XATTR | ||
67 | /* | ||
68 | * Extended attributes can be read independently of the main file | ||
69 | * data. Taking i_mutex even when reading would cause contention | ||
70 | * between readers of EAs and writers of regular file data, so | ||
71 | * instead we synchronize on xattr_sem when reading or changing | ||
72 | * EAs. | ||
73 | */ | ||
74 | struct rw_semaphore xattr_sem; | ||
75 | #endif | ||
76 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | ||
77 | struct posix_acl *i_acl; | ||
78 | struct posix_acl *i_default_acl; | ||
79 | #endif | ||
80 | |||
81 | struct list_head i_orphan; /* unlinked but open inodes */ | ||
82 | |||
83 | /* | ||
84 | * i_disksize keeps track of what the inode size is ON DISK, not | ||
85 | * in memory. During truncate, i_size is set to the new size by | ||
86 | * the VFS prior to calling ext4_truncate(), but the filesystem won't | ||
87 | * set i_disksize to 0 until the truncate is actually under way. | ||
88 | * | ||
89 | * The intent is that i_disksize always represents the blocks which | ||
90 | * are used by this file. This allows recovery to restart truncate | ||
91 | * on orphans if we crash during truncate. We actually write i_disksize | ||
92 | * into the on-disk inode when writing inodes out, instead of i_size. | ||
93 | * | ||
94 | * The only time when i_disksize and i_size may be different is when | ||
95 | * a truncate is in progress. The only things which change i_disksize | ||
96 | * are ext4_get_block (growth) and ext4_truncate (shrinkth). | ||
97 | */ | ||
98 | loff_t i_disksize; | ||
99 | |||
100 | /* | ||
101 | * i_data_sem is for serialising ext4_truncate() against | ||
102 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's | ||
103 | * data tree are chopped off during truncate. We can't do that in | ||
104 | * ext4 because whenever we perform intermediate commits during | ||
105 | * truncate, the inode and all the metadata blocks *must* be in a | ||
106 | * consistent state which allows truncation of the orphans to restart | ||
107 | * during recovery. Hence we must fix the get_block-vs-truncate race | ||
108 | * by other means, so we have i_data_sem. | ||
109 | */ | ||
110 | struct rw_semaphore i_data_sem; | ||
111 | struct inode vfs_inode; | ||
112 | struct jbd2_inode jinode; | ||
113 | |||
114 | struct ext4_ext_cache i_cached_extent; | ||
115 | /* | ||
116 | * File creation time. Its function is same as that of | ||
117 | * struct timespec i_{a,c,m}time in the generic inode. | ||
118 | */ | ||
119 | struct timespec i_crtime; | ||
120 | |||
121 | /* mballoc */ | ||
122 | struct list_head i_prealloc_list; | ||
123 | spinlock_t i_prealloc_lock; | ||
124 | |||
125 | /* ialloc */ | ||
126 | ext4_group_t i_last_alloc_group; | ||
127 | |||
128 | /* allocation reservation info for delalloc */ | ||
129 | unsigned int i_reserved_data_blocks; | ||
130 | unsigned int i_reserved_meta_blocks; | ||
131 | unsigned int i_allocated_meta_blocks; | ||
132 | unsigned short i_delalloc_reserved_flag; | ||
133 | |||
134 | /* on-disk additional length */ | ||
135 | __u16 i_extra_isize; | ||
136 | |||
137 | spinlock_t i_block_reservation_lock; | ||
138 | }; | ||
139 | |||
140 | #endif /* _EXT4_I */ | ||
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h deleted file mode 100644 index 57b71fefbccf..000000000000 --- a/fs/ext4/ext4_sb.h +++ /dev/null | |||
@@ -1,161 +0,0 @@ | |||
1 | /* | ||
2 | * ext4_sb.h | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/include/linux/minix_fs_sb.h | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | */ | ||
15 | |||
16 | #ifndef _EXT4_SB | ||
17 | #define _EXT4_SB | ||
18 | |||
19 | #ifdef __KERNEL__ | ||
20 | #include <linux/timer.h> | ||
21 | #include <linux/wait.h> | ||
22 | #include <linux/blockgroup_lock.h> | ||
23 | #include <linux/percpu_counter.h> | ||
24 | #endif | ||
25 | #include <linux/rbtree.h> | ||
26 | |||
27 | /* | ||
28 | * fourth extended-fs super-block data in memory | ||
29 | */ | ||
30 | struct ext4_sb_info { | ||
31 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | ||
32 | unsigned long s_inodes_per_block;/* Number of inodes per block */ | ||
33 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ | ||
34 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ | ||
35 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ | ||
36 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | ||
37 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | ||
38 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | ||
39 | unsigned long s_overhead_last; /* Last calculated overhead */ | ||
40 | unsigned long s_blocks_last; /* Last seen block count */ | ||
41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | ||
42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | ||
43 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | ||
44 | struct buffer_head **s_group_desc; | ||
45 | unsigned long s_mount_opt; | ||
46 | ext4_fsblk_t s_sb_block; | ||
47 | uid_t s_resuid; | ||
48 | gid_t s_resgid; | ||
49 | unsigned short s_mount_state; | ||
50 | unsigned short s_pad; | ||
51 | int s_addr_per_block_bits; | ||
52 | int s_desc_per_block_bits; | ||
53 | int s_inode_size; | ||
54 | int s_first_ino; | ||
55 | unsigned int s_inode_readahead_blks; | ||
56 | spinlock_t s_next_gen_lock; | ||
57 | u32 s_next_generation; | ||
58 | u32 s_hash_seed[4]; | ||
59 | int s_def_hash_version; | ||
60 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | ||
61 | struct percpu_counter s_freeblocks_counter; | ||
62 | struct percpu_counter s_freeinodes_counter; | ||
63 | struct percpu_counter s_dirs_counter; | ||
64 | struct percpu_counter s_dirtyblocks_counter; | ||
65 | struct blockgroup_lock *s_blockgroup_lock; | ||
66 | struct proc_dir_entry *s_proc; | ||
67 | struct kobject s_kobj; | ||
68 | struct completion s_kobj_unregister; | ||
69 | |||
70 | /* Journaling */ | ||
71 | struct inode *s_journal_inode; | ||
72 | struct journal_s *s_journal; | ||
73 | struct list_head s_orphan; | ||
74 | unsigned long s_commit_interval; | ||
75 | u32 s_max_batch_time; | ||
76 | u32 s_min_batch_time; | ||
77 | struct block_device *journal_bdev; | ||
78 | #ifdef CONFIG_JBD2_DEBUG | ||
79 | struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ | ||
80 | wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ | ||
81 | #endif | ||
82 | #ifdef CONFIG_QUOTA | ||
83 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | ||
84 | int s_jquota_fmt; /* Format of quota to use */ | ||
85 | #endif | ||
86 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | ||
87 | |||
88 | #ifdef EXTENTS_STATS | ||
89 | /* ext4 extents stats */ | ||
90 | unsigned long s_ext_min; | ||
91 | unsigned long s_ext_max; | ||
92 | unsigned long s_depth_max; | ||
93 | spinlock_t s_ext_stats_lock; | ||
94 | unsigned long s_ext_blocks; | ||
95 | unsigned long s_ext_extents; | ||
96 | #endif | ||
97 | |||
98 | /* for buddy allocator */ | ||
99 | struct ext4_group_info ***s_group_info; | ||
100 | struct inode *s_buddy_cache; | ||
101 | long s_blocks_reserved; | ||
102 | spinlock_t s_reserve_lock; | ||
103 | spinlock_t s_md_lock; | ||
104 | tid_t s_last_transaction; | ||
105 | unsigned short *s_mb_offsets; | ||
106 | unsigned int *s_mb_maxs; | ||
107 | |||
108 | /* tunables */ | ||
109 | unsigned long s_stripe; | ||
110 | unsigned int s_mb_stream_request; | ||
111 | unsigned int s_mb_max_to_scan; | ||
112 | unsigned int s_mb_min_to_scan; | ||
113 | unsigned int s_mb_stats; | ||
114 | unsigned int s_mb_order2_reqs; | ||
115 | unsigned int s_mb_group_prealloc; | ||
116 | /* where last allocation was done - for stream allocation */ | ||
117 | unsigned long s_mb_last_group; | ||
118 | unsigned long s_mb_last_start; | ||
119 | |||
120 | /* history to debug policy */ | ||
121 | struct ext4_mb_history *s_mb_history; | ||
122 | int s_mb_history_cur; | ||
123 | int s_mb_history_max; | ||
124 | int s_mb_history_num; | ||
125 | spinlock_t s_mb_history_lock; | ||
126 | int s_mb_history_filter; | ||
127 | |||
128 | /* stats for buddy allocator */ | ||
129 | spinlock_t s_mb_pa_lock; | ||
130 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | ||
131 | atomic_t s_bal_success; /* we found long enough chunks */ | ||
132 | atomic_t s_bal_allocated; /* in blocks */ | ||
133 | atomic_t s_bal_ex_scanned; /* total extents scanned */ | ||
134 | atomic_t s_bal_goals; /* goal hits */ | ||
135 | atomic_t s_bal_breaks; /* too long searches */ | ||
136 | atomic_t s_bal_2orders; /* 2^order hits */ | ||
137 | spinlock_t s_bal_lock; | ||
138 | unsigned long s_mb_buddies_generated; | ||
139 | unsigned long long s_mb_generation_time; | ||
140 | atomic_t s_mb_lost_chunks; | ||
141 | atomic_t s_mb_preallocated; | ||
142 | atomic_t s_mb_discarded; | ||
143 | |||
144 | /* locality groups */ | ||
145 | struct ext4_locality_group *s_locality_groups; | ||
146 | |||
147 | /* for write statistics */ | ||
148 | unsigned long s_sectors_written_start; | ||
149 | u64 s_kbytes_written; | ||
150 | |||
151 | unsigned int s_log_groups_per_flex; | ||
152 | struct flex_groups *s_flex_groups; | ||
153 | }; | ||
154 | |||
155 | static inline spinlock_t * | ||
156 | sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group) | ||
157 | { | ||
158 | return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); | ||
159 | } | ||
160 | |||
161 | #endif /* _EXT4_SB */ | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e3a55eb8b26a..2593f748c3a4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
326 | 326 | ||
327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
328 | { | 328 | { |
329 | ext4_fsblk_t block = ext_pblock(ext), valid_block; | 329 | ext4_fsblk_t block = ext_pblock(ext); |
330 | int len = ext4_ext_get_actual_len(ext); | 330 | int len = ext4_ext_get_actual_len(ext); |
331 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
332 | 331 | ||
333 | valid_block = le32_to_cpu(es->s_first_data_block) + | 332 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
334 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
335 | if (unlikely(block <= valid_block || | ||
336 | ((block + len) > ext4_blocks_count(es)))) | ||
337 | return 0; | ||
338 | else | ||
339 | return 1; | ||
340 | } | 333 | } |
341 | 334 | ||
342 | static int ext4_valid_extent_idx(struct inode *inode, | 335 | static int ext4_valid_extent_idx(struct inode *inode, |
343 | struct ext4_extent_idx *ext_idx) | 336 | struct ext4_extent_idx *ext_idx) |
344 | { | 337 | { |
345 | ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; | 338 | ext4_fsblk_t block = idx_pblock(ext_idx); |
346 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
347 | 339 | ||
348 | valid_block = le32_to_cpu(es->s_first_data_block) + | 340 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); |
349 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
350 | if (unlikely(block <= valid_block || | ||
351 | (block >= ext4_blocks_count(es)))) | ||
352 | return 0; | ||
353 | else | ||
354 | return 1; | ||
355 | } | 341 | } |
356 | 342 | ||
357 | static int ext4_valid_extent_entries(struct inode *inode, | 343 | static int ext4_valid_extent_entries(struct inode *inode, |
@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2097 | ex = EXT_LAST_EXTENT(eh); | 2083 | ex = EXT_LAST_EXTENT(eh); |
2098 | 2084 | ||
2099 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2085 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2100 | if (ext4_ext_is_uninitialized(ex)) | ||
2101 | uninitialized = 1; | ||
2102 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2086 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2103 | 2087 | ||
2104 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2088 | while (ex >= EXT_FIRST_EXTENT(eh) && |
2105 | ex_ee_block + ex_ee_len > start) { | 2089 | ex_ee_block + ex_ee_len > start) { |
2090 | |||
2091 | if (ext4_ext_is_uninitialized(ex)) | ||
2092 | uninitialized = 1; | ||
2093 | else | ||
2094 | uninitialized = 0; | ||
2095 | |||
2106 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); | 2096 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); |
2107 | path[depth].p_ext = ex; | 2097 | path[depth].p_ext = ex; |
2108 | 2098 | ||
@@ -2784,7 +2774,7 @@ fix_extent_len: | |||
2784 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 2774 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
2785 | ext4_lblk_t iblock, | 2775 | ext4_lblk_t iblock, |
2786 | unsigned int max_blocks, struct buffer_head *bh_result, | 2776 | unsigned int max_blocks, struct buffer_head *bh_result, |
2787 | int create, int extend_disksize) | 2777 | int flags) |
2788 | { | 2778 | { |
2789 | struct ext4_ext_path *path = NULL; | 2779 | struct ext4_ext_path *path = NULL; |
2790 | struct ext4_extent_header *eh; | 2780 | struct ext4_extent_header *eh; |
@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2793 | int err = 0, depth, ret, cache_type; | 2783 | int err = 0, depth, ret, cache_type; |
2794 | unsigned int allocated = 0; | 2784 | unsigned int allocated = 0; |
2795 | struct ext4_allocation_request ar; | 2785 | struct ext4_allocation_request ar; |
2796 | loff_t disksize; | ||
2797 | 2786 | ||
2798 | __clear_bit(BH_New, &bh_result->b_state); | 2787 | __clear_bit(BH_New, &bh_result->b_state); |
2799 | ext_debug("blocks %u/%u requested for inode %u\n", | 2788 | ext_debug("blocks %u/%u requested for inode %u\n", |
@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2803 | cache_type = ext4_ext_in_cache(inode, iblock, &newex); | 2792 | cache_type = ext4_ext_in_cache(inode, iblock, &newex); |
2804 | if (cache_type) { | 2793 | if (cache_type) { |
2805 | if (cache_type == EXT4_EXT_CACHE_GAP) { | 2794 | if (cache_type == EXT4_EXT_CACHE_GAP) { |
2806 | if (!create) { | 2795 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
2807 | /* | 2796 | /* |
2808 | * block isn't allocated yet and | 2797 | * block isn't allocated yet and |
2809 | * user doesn't want to allocate it | 2798 | * user doesn't want to allocate it |
@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2869 | EXT4_EXT_CACHE_EXTENT); | 2858 | EXT4_EXT_CACHE_EXTENT); |
2870 | goto out; | 2859 | goto out; |
2871 | } | 2860 | } |
2872 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) | 2861 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) |
2873 | goto out; | 2862 | goto out; |
2874 | if (!create) { | 2863 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
2864 | if (allocated > max_blocks) | ||
2865 | allocated = max_blocks; | ||
2875 | /* | 2866 | /* |
2876 | * We have blocks reserved already. We | 2867 | * We have blocks reserved already. We |
2877 | * return allocated blocks so that delalloc | 2868 | * return allocated blocks so that delalloc |
@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2879 | * the buffer head will be unmapped so that | 2870 | * the buffer head will be unmapped so that |
2880 | * a read from the block returns 0s. | 2871 | * a read from the block returns 0s. |
2881 | */ | 2872 | */ |
2882 | if (allocated > max_blocks) | ||
2883 | allocated = max_blocks; | ||
2884 | set_buffer_unwritten(bh_result); | 2873 | set_buffer_unwritten(bh_result); |
2885 | bh_result->b_bdev = inode->i_sb->s_bdev; | 2874 | bh_result->b_bdev = inode->i_sb->s_bdev; |
2886 | bh_result->b_blocknr = newblock; | 2875 | bh_result->b_blocknr = newblock; |
@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2903 | * requested block isn't allocated yet; | 2892 | * requested block isn't allocated yet; |
2904 | * we couldn't try to create block if create flag is zero | 2893 | * we couldn't try to create block if create flag is zero |
2905 | */ | 2894 | */ |
2906 | if (!create) { | 2895 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
2907 | /* | 2896 | /* |
2908 | * put just found gap into cache to speed up | 2897 | * put just found gap into cache to speed up |
2909 | * subsequent requests | 2898 | * subsequent requests |
@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2932 | * EXT_UNINIT_MAX_LEN. | 2921 | * EXT_UNINIT_MAX_LEN. |
2933 | */ | 2922 | */ |
2934 | if (max_blocks > EXT_INIT_MAX_LEN && | 2923 | if (max_blocks > EXT_INIT_MAX_LEN && |
2935 | create != EXT4_CREATE_UNINITIALIZED_EXT) | 2924 | !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
2936 | max_blocks = EXT_INIT_MAX_LEN; | 2925 | max_blocks = EXT_INIT_MAX_LEN; |
2937 | else if (max_blocks > EXT_UNINIT_MAX_LEN && | 2926 | else if (max_blocks > EXT_UNINIT_MAX_LEN && |
2938 | create == EXT4_CREATE_UNINITIALIZED_EXT) | 2927 | (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
2939 | max_blocks = EXT_UNINIT_MAX_LEN; | 2928 | max_blocks = EXT_UNINIT_MAX_LEN; |
2940 | 2929 | ||
2941 | /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ | 2930 | /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ |
@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2966 | /* try to insert new extent into found leaf and return */ | 2955 | /* try to insert new extent into found leaf and return */ |
2967 | ext4_ext_store_pblock(&newex, newblock); | 2956 | ext4_ext_store_pblock(&newex, newblock); |
2968 | newex.ee_len = cpu_to_le16(ar.len); | 2957 | newex.ee_len = cpu_to_le16(ar.len); |
2969 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ | 2958 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */ |
2970 | ext4_ext_mark_uninitialized(&newex); | 2959 | ext4_ext_mark_uninitialized(&newex); |
2971 | err = ext4_ext_insert_extent(handle, inode, path, &newex); | 2960 | err = ext4_ext_insert_extent(handle, inode, path, &newex); |
2972 | if (err) { | 2961 | if (err) { |
@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2983 | newblock = ext_pblock(&newex); | 2972 | newblock = ext_pblock(&newex); |
2984 | allocated = ext4_ext_get_actual_len(&newex); | 2973 | allocated = ext4_ext_get_actual_len(&newex); |
2985 | outnew: | 2974 | outnew: |
2986 | if (extend_disksize) { | ||
2987 | disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits; | ||
2988 | if (disksize > i_size_read(inode)) | ||
2989 | disksize = i_size_read(inode); | ||
2990 | if (disksize > EXT4_I(inode)->i_disksize) | ||
2991 | EXT4_I(inode)->i_disksize = disksize; | ||
2992 | } | ||
2993 | |||
2994 | set_buffer_new(bh_result); | 2975 | set_buffer_new(bh_result); |
2995 | 2976 | ||
2996 | /* Cache only when it is _not_ an uninitialized extent */ | 2977 | /* Cache only when it is _not_ an uninitialized extent */ |
2997 | if (create != EXT4_CREATE_UNINITIALIZED_EXT) | 2978 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) |
2998 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, | 2979 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, |
2999 | EXT4_EXT_CACHE_EXTENT); | 2980 | EXT4_EXT_CACHE_EXTENT); |
3000 | out: | 2981 | out: |
@@ -3150,9 +3131,10 @@ retry: | |||
3150 | ret = PTR_ERR(handle); | 3131 | ret = PTR_ERR(handle); |
3151 | break; | 3132 | break; |
3152 | } | 3133 | } |
3153 | ret = ext4_get_blocks_wrap(handle, inode, block, | 3134 | map_bh.b_state = 0; |
3154 | max_blocks, &map_bh, | 3135 | ret = ext4_get_blocks(handle, inode, block, |
3155 | EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); | 3136 | max_blocks, &map_bh, |
3137 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); | ||
3156 | if (ret <= 0) { | 3138 | if (ret <= 0) { |
3157 | #ifdef EXT4FS_DEBUG | 3139 | #ifdef EXT4FS_DEBUG |
3158 | WARN_ON(ret <= 0); | 3140 | WARN_ON(ret <= 0); |
@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3195 | void *data) | 3177 | void *data) |
3196 | { | 3178 | { |
3197 | struct fiemap_extent_info *fieinfo = data; | 3179 | struct fiemap_extent_info *fieinfo = data; |
3198 | unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; | 3180 | unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; |
3199 | __u64 logical; | 3181 | __u64 logical; |
3200 | __u64 physical; | 3182 | __u64 physical; |
3201 | __u64 length; | 3183 | __u64 length; |
@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3242 | * | 3224 | * |
3243 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | 3225 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK |
3244 | */ | 3226 | */ |
3245 | if (logical + length - 1 == EXT_MAX_BLOCK || | 3227 | if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || |
3246 | ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) | 3228 | newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { |
3229 | loff_t size = i_size_read(inode); | ||
3230 | loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); | ||
3231 | |||
3247 | flags |= FIEMAP_EXTENT_LAST; | 3232 | flags |= FIEMAP_EXTENT_LAST; |
3233 | if ((flags & FIEMAP_EXTENT_DELALLOC) && | ||
3234 | logical+length > size) | ||
3235 | length = (size - logical + bs - 1) & ~(bs-1); | ||
3236 | } | ||
3248 | 3237 | ||
3249 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | 3238 | error = fiemap_fill_next_extent(fieinfo, logical, physical, |
3250 | length, flags); | 3239 | length, flags); |
@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3318 | * Walk the extent tree gathering extent information. | 3307 | * Walk the extent tree gathering extent information. |
3319 | * ext4_ext_fiemap_cb will push extents back to user. | 3308 | * ext4_ext_fiemap_cb will push extents back to user. |
3320 | */ | 3309 | */ |
3321 | down_write(&EXT4_I(inode)->i_data_sem); | 3310 | down_read(&EXT4_I(inode)->i_data_sem); |
3322 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | 3311 | error = ext4_ext_walk_space(inode, start_blk, len_blks, |
3323 | ext4_ext_fiemap_cb, fieinfo); | 3312 | ext4_ext_fiemap_cb, fieinfo); |
3324 | up_write(&EXT4_I(inode)->i_data_sem); | 3313 | up_read(&EXT4_I(inode)->i_data_sem); |
3325 | } | 3314 | } |
3326 | 3315 | ||
3327 | return error; | 3316 | return error; |
diff --git a/fs/ext4/group.h b/fs/ext4/group.h deleted file mode 100644 index c2c0a8d06d0e..000000000000 --- a/fs/ext4/group.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/group.h | ||
3 | * | ||
4 | * Copyright (C) 2007 Cluster File Systems, Inc | ||
5 | * | ||
6 | * Author: Andreas Dilger <adilger@clusterfs.com> | ||
7 | */ | ||
8 | |||
9 | #ifndef _LINUX_EXT4_GROUP_H | ||
10 | #define _LINUX_EXT4_GROUP_H | ||
11 | |||
12 | extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | ||
13 | struct ext4_group_desc *gdp); | ||
14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | ||
15 | struct ext4_group_desc *gdp); | ||
16 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | ||
17 | ext4_group_t block_group); | ||
18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | ||
19 | struct buffer_head *bh, | ||
20 | ext4_group_t group, | ||
21 | struct ext4_group_desc *desc); | ||
22 | #define ext4_free_blocks_after_init(sb, group, desc) \ | ||
23 | ext4_init_block_bitmap(sb, NULL, group, desc) | ||
24 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | ||
25 | struct buffer_head *bh, | ||
26 | ext4_group_t group, | ||
27 | struct ext4_group_desc *desc); | ||
28 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
29 | #endif /* _LINUX_EXT4_GROUP_H */ | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f18e0a08a6b5..3743bd849bce 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include "ext4_jbd2.h" | 27 | #include "ext4_jbd2.h" |
28 | #include "xattr.h" | 28 | #include "xattr.h" |
29 | #include "acl.h" | 29 | #include "acl.h" |
30 | #include "group.h" | ||
31 | 30 | ||
32 | /* | 31 | /* |
33 | * ialloc.c contains the inodes allocation and deallocation routines | 32 | * ialloc.c contains the inodes allocation and deallocation routines |
@@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 122 | unlock_buffer(bh); |
124 | return bh; | 123 | return bh; |
125 | } | 124 | } |
126 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 125 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 126 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 127 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
129 | set_bitmap_uptodate(bh); | 128 | set_bitmap_uptodate(bh); |
130 | set_buffer_uptodate(bh); | 129 | set_buffer_uptodate(bh); |
131 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 130 | ext4_unlock_group(sb, block_group); |
132 | unlock_buffer(bh); | 131 | unlock_buffer(bh); |
133 | return bh; | 132 | return bh; |
134 | } | 133 | } |
135 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 134 | ext4_unlock_group(sb, block_group); |
136 | if (buffer_uptodate(bh)) { | 135 | if (buffer_uptodate(bh)) { |
137 | /* | 136 | /* |
138 | * if not uninit if bh is uptodate, | 137 | * if not uninit if bh is uptodate, |
@@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
247 | goto error_return; | 246 | goto error_return; |
248 | 247 | ||
249 | /* Ok, now we can actually update the inode bitmaps.. */ | 248 | /* Ok, now we can actually update the inode bitmaps.. */ |
250 | spin_lock(sb_bgl_lock(sbi, block_group)); | 249 | cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), |
251 | cleared = ext4_clear_bit(bit, bitmap_bh->b_data); | 250 | bit, bitmap_bh->b_data); |
252 | spin_unlock(sb_bgl_lock(sbi, block_group)); | ||
253 | if (!cleared) | 251 | if (!cleared) |
254 | ext4_error(sb, "ext4_free_inode", | 252 | ext4_error(sb, "ext4_free_inode", |
255 | "bit already cleared for inode %lu", ino); | 253 | "bit already cleared for inode %lu", ino); |
@@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
261 | if (fatal) goto error_return; | 259 | if (fatal) goto error_return; |
262 | 260 | ||
263 | if (gdp) { | 261 | if (gdp) { |
264 | spin_lock(sb_bgl_lock(sbi, block_group)); | 262 | ext4_lock_group(sb, block_group); |
265 | count = ext4_free_inodes_count(sb, gdp) + 1; | 263 | count = ext4_free_inodes_count(sb, gdp) + 1; |
266 | ext4_free_inodes_set(sb, gdp, count); | 264 | ext4_free_inodes_set(sb, gdp, count); |
267 | if (is_directory) { | 265 | if (is_directory) { |
@@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
277 | } | 275 | } |
278 | gdp->bg_checksum = ext4_group_desc_csum(sbi, | 276 | gdp->bg_checksum = ext4_group_desc_csum(sbi, |
279 | block_group, gdp); | 277 | block_group, gdp); |
280 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 278 | ext4_unlock_group(sb, block_group); |
281 | percpu_counter_inc(&sbi->s_freeinodes_counter); | 279 | percpu_counter_inc(&sbi->s_freeinodes_counter); |
282 | if (is_directory) | 280 | if (is_directory) |
283 | percpu_counter_dec(&sbi->s_dirs_counter); | 281 | percpu_counter_dec(&sbi->s_dirs_counter); |
@@ -316,7 +314,7 @@ error_return: | |||
316 | static int find_group_dir(struct super_block *sb, struct inode *parent, | 314 | static int find_group_dir(struct super_block *sb, struct inode *parent, |
317 | ext4_group_t *best_group) | 315 | ext4_group_t *best_group) |
318 | { | 316 | { |
319 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 317 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
320 | unsigned int freei, avefreei; | 318 | unsigned int freei, avefreei; |
321 | struct ext4_group_desc *desc, *best_desc = NULL; | 319 | struct ext4_group_desc *desc, *best_desc = NULL; |
322 | ext4_group_t group; | 320 | ext4_group_t group; |
@@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, | |||
349 | { | 347 | { |
350 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 348 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
351 | struct ext4_group_desc *desc; | 349 | struct ext4_group_desc *desc; |
352 | struct buffer_head *bh; | ||
353 | struct flex_groups *flex_group = sbi->s_flex_groups; | 350 | struct flex_groups *flex_group = sbi->s_flex_groups; |
354 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 351 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
355 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); | 352 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); |
356 | ext4_group_t ngroups = sbi->s_groups_count; | 353 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
357 | int flex_size = ext4_flex_bg_size(sbi); | 354 | int flex_size = ext4_flex_bg_size(sbi); |
358 | ext4_group_t best_flex = parent_fbg_group; | 355 | ext4_group_t best_flex = parent_fbg_group; |
359 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; | 356 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; |
@@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, | |||
362 | ext4_group_t n_fbg_groups; | 359 | ext4_group_t n_fbg_groups; |
363 | ext4_group_t i; | 360 | ext4_group_t i; |
364 | 361 | ||
365 | n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> | 362 | n_fbg_groups = (ngroups + flex_size - 1) >> |
366 | sbi->s_log_groups_per_flex; | 363 | sbi->s_log_groups_per_flex; |
367 | 364 | ||
368 | find_close_to_parent: | 365 | find_close_to_parent: |
@@ -404,7 +401,7 @@ find_close_to_parent: | |||
404 | found_flexbg: | 401 | found_flexbg: |
405 | for (i = best_flex * flex_size; i < ngroups && | 402 | for (i = best_flex * flex_size; i < ngroups && |
406 | i < (best_flex + 1) * flex_size; i++) { | 403 | i < (best_flex + 1) * flex_size; i++) { |
407 | desc = ext4_get_group_desc(sb, i, &bh); | 404 | desc = ext4_get_group_desc(sb, i, NULL); |
408 | if (ext4_free_inodes_count(sb, desc)) { | 405 | if (ext4_free_inodes_count(sb, desc)) { |
409 | *best_group = i; | 406 | *best_group = i; |
410 | goto out; | 407 | goto out; |
@@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
478 | { | 475 | { |
479 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 476 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
480 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 477 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
481 | ext4_group_t ngroups = sbi->s_groups_count; | 478 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
482 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 479 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
483 | unsigned int freei, avefreei; | 480 | unsigned int freei, avefreei; |
484 | ext4_fsblk_t freeb, avefreeb; | 481 | ext4_fsblk_t freeb, avefreeb; |
485 | unsigned int ndirs; | 482 | unsigned int ndirs; |
486 | int max_dirs, min_inodes; | 483 | int max_dirs, min_inodes; |
487 | ext4_grpblk_t min_blocks; | 484 | ext4_grpblk_t min_blocks; |
488 | ext4_group_t i, grp, g; | 485 | ext4_group_t i, grp, g, ngroups; |
489 | struct ext4_group_desc *desc; | 486 | struct ext4_group_desc *desc; |
490 | struct orlov_stats stats; | 487 | struct orlov_stats stats; |
491 | int flex_size = ext4_flex_bg_size(sbi); | 488 | int flex_size = ext4_flex_bg_size(sbi); |
492 | 489 | ||
490 | ngroups = real_ngroups; | ||
493 | if (flex_size > 1) { | 491 | if (flex_size > 1) { |
494 | ngroups = (ngroups + flex_size - 1) >> | 492 | ngroups = (real_ngroups + flex_size - 1) >> |
495 | sbi->s_log_groups_per_flex; | 493 | sbi->s_log_groups_per_flex; |
496 | parent_group >>= sbi->s_log_groups_per_flex; | 494 | parent_group >>= sbi->s_log_groups_per_flex; |
497 | } | 495 | } |
@@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
543 | */ | 541 | */ |
544 | grp *= flex_size; | 542 | grp *= flex_size; |
545 | for (i = 0; i < flex_size; i++) { | 543 | for (i = 0; i < flex_size; i++) { |
546 | if (grp+i >= sbi->s_groups_count) | 544 | if (grp+i >= real_ngroups) |
547 | break; | 545 | break; |
548 | desc = ext4_get_group_desc(sb, grp+i, NULL); | 546 | desc = ext4_get_group_desc(sb, grp+i, NULL); |
549 | if (desc && ext4_free_inodes_count(sb, desc)) { | 547 | if (desc && ext4_free_inodes_count(sb, desc)) { |
@@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
583 | } | 581 | } |
584 | 582 | ||
585 | fallback: | 583 | fallback: |
586 | ngroups = sbi->s_groups_count; | 584 | ngroups = real_ngroups; |
587 | avefreei = freei / ngroups; | 585 | avefreei = freei / ngroups; |
588 | fallback_retry: | 586 | fallback_retry: |
589 | parent_group = EXT4_I(parent)->i_block_group; | 587 | parent_group = EXT4_I(parent)->i_block_group; |
@@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
613 | ext4_group_t *group, int mode) | 611 | ext4_group_t *group, int mode) |
614 | { | 612 | { |
615 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 613 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
616 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 614 | ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); |
617 | struct ext4_group_desc *desc; | 615 | struct ext4_group_desc *desc; |
618 | ext4_group_t i, last; | ||
619 | int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); | 616 | int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); |
620 | 617 | ||
621 | /* | 618 | /* |
@@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
708 | 705 | ||
709 | /* | 706 | /* |
710 | * claim the inode from the inode bitmap. If the group | 707 | * claim the inode from the inode bitmap. If the group |
711 | * is uninit we need to take the groups's sb_bgl_lock | 708 | * is uninit we need to take the groups's ext4_group_lock |
712 | * and clear the uninit flag. The inode bitmap update | 709 | * and clear the uninit flag. The inode bitmap update |
713 | * and group desc uninit flag clear should be done | 710 | * and group desc uninit flag clear should be done |
714 | * after holding sb_bgl_lock so that ext4_read_inode_bitmap | 711 | * after holding ext4_group_lock so that ext4_read_inode_bitmap |
715 | * doesn't race with the ext4_claim_inode | 712 | * doesn't race with the ext4_claim_inode |
716 | */ | 713 | */ |
717 | static int ext4_claim_inode(struct super_block *sb, | 714 | static int ext4_claim_inode(struct super_block *sb, |
@@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
722 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 719 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
723 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 720 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
724 | 721 | ||
725 | spin_lock(sb_bgl_lock(sbi, group)); | 722 | ext4_lock_group(sb, group); |
726 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 723 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
727 | /* not a free inode */ | 724 | /* not a free inode */ |
728 | retval = 1; | 725 | retval = 1; |
@@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
731 | ino++; | 728 | ino++; |
732 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 729 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
733 | ino > EXT4_INODES_PER_GROUP(sb)) { | 730 | ino > EXT4_INODES_PER_GROUP(sb)) { |
734 | spin_unlock(sb_bgl_lock(sbi, group)); | 731 | ext4_unlock_group(sb, group); |
735 | ext4_error(sb, __func__, | 732 | ext4_error(sb, __func__, |
736 | "reserved inode or inode > inodes count - " | 733 | "reserved inode or inode > inodes count - " |
737 | "block_group = %u, inode=%lu", group, | 734 | "block_group = %u, inode=%lu", group, |
@@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
780 | } | 777 | } |
781 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 778 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
782 | err_ret: | 779 | err_ret: |
783 | spin_unlock(sb_bgl_lock(sbi, group)); | 780 | ext4_unlock_group(sb, group); |
784 | return retval; | 781 | return retval; |
785 | } | 782 | } |
786 | 783 | ||
@@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
799 | struct super_block *sb; | 796 | struct super_block *sb; |
800 | struct buffer_head *inode_bitmap_bh = NULL; | 797 | struct buffer_head *inode_bitmap_bh = NULL; |
801 | struct buffer_head *group_desc_bh; | 798 | struct buffer_head *group_desc_bh; |
802 | ext4_group_t group = 0; | 799 | ext4_group_t ngroups, group = 0; |
803 | unsigned long ino = 0; | 800 | unsigned long ino = 0; |
804 | struct inode *inode; | 801 | struct inode *inode; |
805 | struct ext4_group_desc *gdp = NULL; | 802 | struct ext4_group_desc *gdp = NULL; |
806 | struct ext4_super_block *es; | ||
807 | struct ext4_inode_info *ei; | 803 | struct ext4_inode_info *ei; |
808 | struct ext4_sb_info *sbi; | 804 | struct ext4_sb_info *sbi; |
809 | int ret2, err = 0; | 805 | int ret2, err = 0; |
@@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
818 | return ERR_PTR(-EPERM); | 814 | return ERR_PTR(-EPERM); |
819 | 815 | ||
820 | sb = dir->i_sb; | 816 | sb = dir->i_sb; |
817 | ngroups = ext4_get_groups_count(sb); | ||
821 | trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, | 818 | trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, |
822 | dir->i_ino, mode); | 819 | dir->i_ino, mode); |
823 | inode = new_inode(sb); | 820 | inode = new_inode(sb); |
824 | if (!inode) | 821 | if (!inode) |
825 | return ERR_PTR(-ENOMEM); | 822 | return ERR_PTR(-ENOMEM); |
826 | ei = EXT4_I(inode); | 823 | ei = EXT4_I(inode); |
827 | |||
828 | sbi = EXT4_SB(sb); | 824 | sbi = EXT4_SB(sb); |
829 | es = sbi->s_es; | ||
830 | 825 | ||
831 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { | 826 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { |
832 | ret2 = find_group_flex(sb, dir, &group); | 827 | ret2 = find_group_flex(sb, dir, &group); |
@@ -856,7 +851,7 @@ got_group: | |||
856 | if (ret2 == -1) | 851 | if (ret2 == -1) |
857 | goto out; | 852 | goto out; |
858 | 853 | ||
859 | for (i = 0; i < sbi->s_groups_count; i++) { | 854 | for (i = 0; i < ngroups; i++) { |
860 | err = -EIO; | 855 | err = -EIO; |
861 | 856 | ||
862 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | 857 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); |
@@ -917,7 +912,7 @@ repeat_in_this_group: | |||
917 | * group descriptor metadata has not yet been updated. | 912 | * group descriptor metadata has not yet been updated. |
918 | * So we just go onto the next blockgroup. | 913 | * So we just go onto the next blockgroup. |
919 | */ | 914 | */ |
920 | if (++group == sbi->s_groups_count) | 915 | if (++group == ngroups) |
921 | group = 0; | 916 | group = 0; |
922 | } | 917 | } |
923 | err = -ENOSPC; | 918 | err = -ENOSPC; |
@@ -938,7 +933,7 @@ got: | |||
938 | } | 933 | } |
939 | 934 | ||
940 | free = 0; | 935 | free = 0; |
941 | spin_lock(sb_bgl_lock(sbi, group)); | 936 | ext4_lock_group(sb, group); |
942 | /* recheck and clear flag under lock if we still need to */ | 937 | /* recheck and clear flag under lock if we still need to */ |
943 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 938 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
944 | free = ext4_free_blocks_after_init(sb, group, gdp); | 939 | free = ext4_free_blocks_after_init(sb, group, gdp); |
@@ -947,7 +942,7 @@ got: | |||
947 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, | 942 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, |
948 | gdp); | 943 | gdp); |
949 | } | 944 | } |
950 | spin_unlock(sb_bgl_lock(sbi, group)); | 945 | ext4_unlock_group(sb, group); |
951 | 946 | ||
952 | /* Don't need to dirty bitmap block if we didn't change it */ | 947 | /* Don't need to dirty bitmap block if we didn't change it */ |
953 | if (free) { | 948 | if (free) { |
@@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1158 | { | 1153 | { |
1159 | unsigned long desc_count; | 1154 | unsigned long desc_count; |
1160 | struct ext4_group_desc *gdp; | 1155 | struct ext4_group_desc *gdp; |
1161 | ext4_group_t i; | 1156 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
1162 | #ifdef EXT4FS_DEBUG | 1157 | #ifdef EXT4FS_DEBUG |
1163 | struct ext4_super_block *es; | 1158 | struct ext4_super_block *es; |
1164 | unsigned long bitmap_count, x; | 1159 | unsigned long bitmap_count, x; |
@@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1168 | desc_count = 0; | 1163 | desc_count = 0; |
1169 | bitmap_count = 0; | 1164 | bitmap_count = 0; |
1170 | gdp = NULL; | 1165 | gdp = NULL; |
1171 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1166 | for (i = 0; i < ngroups; i++) { |
1172 | gdp = ext4_get_group_desc(sb, i, NULL); | 1167 | gdp = ext4_get_group_desc(sb, i, NULL); |
1173 | if (!gdp) | 1168 | if (!gdp) |
1174 | continue; | 1169 | continue; |
@@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1190 | return desc_count; | 1185 | return desc_count; |
1191 | #else | 1186 | #else |
1192 | desc_count = 0; | 1187 | desc_count = 0; |
1193 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1188 | for (i = 0; i < ngroups; i++) { |
1194 | gdp = ext4_get_group_desc(sb, i, NULL); | 1189 | gdp = ext4_get_group_desc(sb, i, NULL); |
1195 | if (!gdp) | 1190 | if (!gdp) |
1196 | continue; | 1191 | continue; |
@@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1205 | unsigned long ext4_count_dirs(struct super_block * sb) | 1200 | unsigned long ext4_count_dirs(struct super_block * sb) |
1206 | { | 1201 | { |
1207 | unsigned long count = 0; | 1202 | unsigned long count = 0; |
1208 | ext4_group_t i; | 1203 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
1209 | 1204 | ||
1210 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1205 | for (i = 0; i < ngroups; i++) { |
1211 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 1206 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
1212 | if (!gdp) | 1207 | if (!gdp) |
1213 | continue; | 1208 | continue; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a9ffd528dd1..875db944b22f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode, | |||
372 | } | 372 | } |
373 | 373 | ||
374 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 374 | static int __ext4_check_blockref(const char *function, struct inode *inode, |
375 | __le32 *p, unsigned int max) { | 375 | __le32 *p, unsigned int max) |
376 | 376 | { | |
377 | unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); | ||
378 | __le32 *bref = p; | 377 | __le32 *bref = p; |
378 | unsigned int blk; | ||
379 | |||
379 | while (bref < p+max) { | 380 | while (bref < p+max) { |
380 | if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { | 381 | blk = le32_to_cpu(*bref++); |
382 | if (blk && | ||
383 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
384 | blk, 1))) { | ||
381 | ext4_error(inode->i_sb, function, | 385 | ext4_error(inode->i_sb, function, |
382 | "block reference %u >= max (%u) " | 386 | "invalid block reference %u " |
383 | "in inode #%lu, offset=%d", | 387 | "in inode #%lu", blk, inode->i_ino); |
384 | le32_to_cpu(*bref), maxblocks, | ||
385 | inode->i_ino, (int)(bref-p)); | ||
386 | return -EIO; | 388 | return -EIO; |
387 | } | 389 | } |
388 | bref++; | ||
389 | } | 390 | } |
390 | return 0; | 391 | return 0; |
391 | } | 392 | } |
@@ -892,6 +893,10 @@ err_out: | |||
892 | } | 893 | } |
893 | 894 | ||
894 | /* | 895 | /* |
896 | * The ext4_ind_get_blocks() function handles non-extents inodes | ||
897 | * (i.e., using the traditional indirect/double-indirect i_blocks | ||
898 | * scheme) for ext4_get_blocks(). | ||
899 | * | ||
895 | * Allocation strategy is simple: if we have to allocate something, we will | 900 | * Allocation strategy is simple: if we have to allocate something, we will |
896 | * have to go the whole way to leaf. So let's do it before attaching anything | 901 | * have to go the whole way to leaf. So let's do it before attaching anything |
897 | * to tree, set linkage between the newborn blocks, write them if sync is | 902 | * to tree, set linkage between the newborn blocks, write them if sync is |
@@ -909,15 +914,16 @@ err_out: | |||
909 | * return = 0, if plain lookup failed. | 914 | * return = 0, if plain lookup failed. |
910 | * return < 0, error case. | 915 | * return < 0, error case. |
911 | * | 916 | * |
912 | * | 917 | * The ext4_ind_get_blocks() function should be called with |
913 | * Need to be called with | 918 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem |
914 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block | 919 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or |
915 | * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) | 920 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
921 | * blocks. | ||
916 | */ | 922 | */ |
917 | static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 923 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, |
918 | ext4_lblk_t iblock, unsigned int maxblocks, | 924 | ext4_lblk_t iblock, unsigned int maxblocks, |
919 | struct buffer_head *bh_result, | 925 | struct buffer_head *bh_result, |
920 | int create, int extend_disksize) | 926 | int flags) |
921 | { | 927 | { |
922 | int err = -EIO; | 928 | int err = -EIO; |
923 | ext4_lblk_t offsets[4]; | 929 | ext4_lblk_t offsets[4]; |
@@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
927 | int indirect_blks; | 933 | int indirect_blks; |
928 | int blocks_to_boundary = 0; | 934 | int blocks_to_boundary = 0; |
929 | int depth; | 935 | int depth; |
930 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
931 | int count = 0; | 936 | int count = 0; |
932 | ext4_fsblk_t first_block = 0; | 937 | ext4_fsblk_t first_block = 0; |
933 | loff_t disksize; | ||
934 | |||
935 | 938 | ||
936 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 939 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
937 | J_ASSERT(handle != NULL || create == 0); | 940 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
938 | depth = ext4_block_to_path(inode, iblock, offsets, | 941 | depth = ext4_block_to_path(inode, iblock, offsets, |
939 | &blocks_to_boundary); | 942 | &blocks_to_boundary); |
940 | 943 | ||
@@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
963 | } | 966 | } |
964 | 967 | ||
965 | /* Next simple case - plain lookup or failed read of indirect block */ | 968 | /* Next simple case - plain lookup or failed read of indirect block */ |
966 | if (!create || err == -EIO) | 969 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) |
967 | goto cleanup; | 970 | goto cleanup; |
968 | 971 | ||
969 | /* | 972 | /* |
@@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
997 | if (!err) | 1000 | if (!err) |
998 | err = ext4_splice_branch(handle, inode, iblock, | 1001 | err = ext4_splice_branch(handle, inode, iblock, |
999 | partial, indirect_blks, count); | 1002 | partial, indirect_blks, count); |
1000 | /* | 1003 | else |
1001 | * i_disksize growing is protected by i_data_sem. Don't forget to | ||
1002 | * protect it if you're about to implement concurrent | ||
1003 | * ext4_get_block() -bzzz | ||
1004 | */ | ||
1005 | if (!err && extend_disksize) { | ||
1006 | disksize = ((loff_t) iblock + count) << inode->i_blkbits; | ||
1007 | if (disksize > i_size_read(inode)) | ||
1008 | disksize = i_size_read(inode); | ||
1009 | if (disksize > ei->i_disksize) | ||
1010 | ei->i_disksize = disksize; | ||
1011 | } | ||
1012 | if (err) | ||
1013 | goto cleanup; | 1004 | goto cleanup; |
1014 | 1005 | ||
1015 | set_buffer_new(bh_result); | 1006 | set_buffer_new(bh_result); |
@@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1120 | ext4_discard_preallocations(inode); | 1111 | ext4_discard_preallocations(inode); |
1121 | } | 1112 | } |
1122 | 1113 | ||
1114 | static int check_block_validity(struct inode *inode, sector_t logical, | ||
1115 | sector_t phys, int len) | ||
1116 | { | ||
1117 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | ||
1118 | ext4_error(inode->i_sb, "check_block_validity", | ||
1119 | "inode #%lu logical block %llu mapped to %llu " | ||
1120 | "(size %d)", inode->i_ino, | ||
1121 | (unsigned long long) logical, | ||
1122 | (unsigned long long) phys, len); | ||
1123 | WARN_ON(1); | ||
1124 | return -EIO; | ||
1125 | } | ||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1123 | /* | 1129 | /* |
1124 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1130 | * The ext4_get_blocks() function tries to look up the requested blocks, |
1125 | * and returns if the blocks are already mapped. | 1131 | * and returns if the blocks are already mapped. |
1126 | * | 1132 | * |
1127 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 1133 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
@@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1129 | * mapped. | 1135 | * mapped. |
1130 | * | 1136 | * |
1131 | * If file type is extents based, it will call ext4_ext_get_blocks(), | 1137 | * If file type is extents based, it will call ext4_ext_get_blocks(), |
1132 | * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping | 1138 | * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping |
1133 | * based files | 1139 | * based files |
1134 | * | 1140 | * |
1135 | * On success, it returns the number of blocks being mapped or allocate. | 1141 | * On success, it returns the number of blocks being mapped or allocate. |
@@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1142 | * | 1148 | * |
1143 | * It returns the error in case of allocation failure. | 1149 | * It returns the error in case of allocation failure. |
1144 | */ | 1150 | */ |
1145 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 1151 | int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, |
1146 | unsigned int max_blocks, struct buffer_head *bh, | 1152 | unsigned int max_blocks, struct buffer_head *bh, |
1147 | int create, int extend_disksize, int flag) | 1153 | int flags) |
1148 | { | 1154 | { |
1149 | int retval; | 1155 | int retval; |
1150 | 1156 | ||
@@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1152 | clear_buffer_unwritten(bh); | 1158 | clear_buffer_unwritten(bh); |
1153 | 1159 | ||
1154 | /* | 1160 | /* |
1155 | * Try to see if we can get the block without requesting | 1161 | * Try to see if we can get the block without requesting a new |
1156 | * for new file system block. | 1162 | * file system block. |
1157 | */ | 1163 | */ |
1158 | down_read((&EXT4_I(inode)->i_data_sem)); | 1164 | down_read((&EXT4_I(inode)->i_data_sem)); |
1159 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1165 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
1160 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1166 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, |
1161 | bh, 0, 0); | 1167 | bh, 0); |
1162 | } else { | 1168 | } else { |
1163 | retval = ext4_get_blocks_handle(handle, | 1169 | retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, |
1164 | inode, block, max_blocks, bh, 0, 0); | 1170 | bh, 0); |
1165 | } | 1171 | } |
1166 | up_read((&EXT4_I(inode)->i_data_sem)); | 1172 | up_read((&EXT4_I(inode)->i_data_sem)); |
1167 | 1173 | ||
1174 | if (retval > 0 && buffer_mapped(bh)) { | ||
1175 | int ret = check_block_validity(inode, block, | ||
1176 | bh->b_blocknr, retval); | ||
1177 | if (ret != 0) | ||
1178 | return ret; | ||
1179 | } | ||
1180 | |||
1168 | /* If it is only a block(s) look up */ | 1181 | /* If it is only a block(s) look up */ |
1169 | if (!create) | 1182 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) |
1170 | return retval; | 1183 | return retval; |
1171 | 1184 | ||
1172 | /* | 1185 | /* |
@@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1205 | * let the underlying get_block() function know to | 1218 | * let the underlying get_block() function know to |
1206 | * avoid double accounting | 1219 | * avoid double accounting |
1207 | */ | 1220 | */ |
1208 | if (flag) | 1221 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1209 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | 1222 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; |
1210 | /* | 1223 | /* |
1211 | * We need to check for EXT4 here because migrate | 1224 | * We need to check for EXT4 here because migrate |
@@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1213 | */ | 1226 | */ |
1214 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1227 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
1215 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1228 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, |
1216 | bh, create, extend_disksize); | 1229 | bh, flags); |
1217 | } else { | 1230 | } else { |
1218 | retval = ext4_get_blocks_handle(handle, inode, block, | 1231 | retval = ext4_ind_get_blocks(handle, inode, block, |
1219 | max_blocks, bh, create, extend_disksize); | 1232 | max_blocks, bh, flags); |
1220 | 1233 | ||
1221 | if (retval > 0 && buffer_new(bh)) { | 1234 | if (retval > 0 && buffer_new(bh)) { |
1222 | /* | 1235 | /* |
@@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1229 | } | 1242 | } |
1230 | } | 1243 | } |
1231 | 1244 | ||
1232 | if (flag) { | 1245 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1233 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1246 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1234 | /* | 1247 | |
1235 | * Update reserved blocks/metadata blocks | 1248 | /* |
1236 | * after successful block allocation | 1249 | * Update reserved blocks/metadata blocks after successful |
1237 | * which were deferred till now | 1250 | * block allocation which had been deferred till now. |
1238 | */ | 1251 | */ |
1239 | if ((retval > 0) && buffer_delay(bh)) | 1252 | if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) |
1240 | ext4_da_update_reserve_space(inode, retval); | 1253 | ext4_da_update_reserve_space(inode, retval); |
1241 | } | ||
1242 | 1254 | ||
1243 | up_write((&EXT4_I(inode)->i_data_sem)); | 1255 | up_write((&EXT4_I(inode)->i_data_sem)); |
1256 | if (retval > 0 && buffer_mapped(bh)) { | ||
1257 | int ret = check_block_validity(inode, block, | ||
1258 | bh->b_blocknr, retval); | ||
1259 | if (ret != 0) | ||
1260 | return ret; | ||
1261 | } | ||
1244 | return retval; | 1262 | return retval; |
1245 | } | 1263 | } |
1246 | 1264 | ||
@@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock, | |||
1268 | started = 1; | 1286 | started = 1; |
1269 | } | 1287 | } |
1270 | 1288 | ||
1271 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | 1289 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, |
1272 | max_blocks, bh_result, create, 0, 0); | 1290 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1273 | if (ret > 0) { | 1291 | if (ret > 0) { |
1274 | bh_result->b_size = (ret << inode->i_blkbits); | 1292 | bh_result->b_size = (ret << inode->i_blkbits); |
1275 | ret = 0; | 1293 | ret = 0; |
@@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1288 | { | 1306 | { |
1289 | struct buffer_head dummy; | 1307 | struct buffer_head dummy; |
1290 | int fatal = 0, err; | 1308 | int fatal = 0, err; |
1309 | int flags = 0; | ||
1291 | 1310 | ||
1292 | J_ASSERT(handle != NULL || create == 0); | 1311 | J_ASSERT(handle != NULL || create == 0); |
1293 | 1312 | ||
1294 | dummy.b_state = 0; | 1313 | dummy.b_state = 0; |
1295 | dummy.b_blocknr = -1000; | 1314 | dummy.b_blocknr = -1000; |
1296 | buffer_trace_init(&dummy.b_history); | 1315 | buffer_trace_init(&dummy.b_history); |
1297 | err = ext4_get_blocks_wrap(handle, inode, block, 1, | 1316 | if (create) |
1298 | &dummy, create, 1, 0); | 1317 | flags |= EXT4_GET_BLOCKS_CREATE; |
1318 | err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); | ||
1299 | /* | 1319 | /* |
1300 | * ext4_get_blocks_handle() returns number of blocks | 1320 | * ext4_get_blocks() returns number of blocks mapped. 0 in |
1301 | * mapped. 0 in case of a HOLE. | 1321 | * case of a HOLE. |
1302 | */ | 1322 | */ |
1303 | if (err > 0) { | 1323 | if (err > 0) { |
1304 | if (err > 1) | 1324 | if (err > 1) |
@@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1439 | struct page **pagep, void **fsdata) | 1459 | struct page **pagep, void **fsdata) |
1440 | { | 1460 | { |
1441 | struct inode *inode = mapping->host; | 1461 | struct inode *inode = mapping->host; |
1442 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1462 | int ret, needed_blocks; |
1443 | handle_t *handle; | 1463 | handle_t *handle; |
1444 | int retries = 0; | 1464 | int retries = 0; |
1445 | struct page *page; | 1465 | struct page *page; |
@@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1450 | "dev %s ino %lu pos %llu len %u flags %u", | 1470 | "dev %s ino %lu pos %llu len %u flags %u", |
1451 | inode->i_sb->s_id, inode->i_ino, | 1471 | inode->i_sb->s_id, inode->i_ino, |
1452 | (unsigned long long) pos, len, flags); | 1472 | (unsigned long long) pos, len, flags); |
1473 | /* | ||
1474 | * Reserve one block more for addition to orphan list in case | ||
1475 | * we allocate blocks but write fails for some reason | ||
1476 | */ | ||
1477 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; | ||
1453 | index = pos >> PAGE_CACHE_SHIFT; | 1478 | index = pos >> PAGE_CACHE_SHIFT; |
1454 | from = pos & (PAGE_CACHE_SIZE - 1); | 1479 | from = pos & (PAGE_CACHE_SIZE - 1); |
1455 | to = from + len; | 1480 | to = from + len; |
@@ -1483,15 +1508,30 @@ retry: | |||
1483 | 1508 | ||
1484 | if (ret) { | 1509 | if (ret) { |
1485 | unlock_page(page); | 1510 | unlock_page(page); |
1486 | ext4_journal_stop(handle); | ||
1487 | page_cache_release(page); | 1511 | page_cache_release(page); |
1488 | /* | 1512 | /* |
1489 | * block_write_begin may have instantiated a few blocks | 1513 | * block_write_begin may have instantiated a few blocks |
1490 | * outside i_size. Trim these off again. Don't need | 1514 | * outside i_size. Trim these off again. Don't need |
1491 | * i_size_read because we hold i_mutex. | 1515 | * i_size_read because we hold i_mutex. |
1516 | * | ||
1517 | * Add inode to orphan list in case we crash before | ||
1518 | * truncate finishes | ||
1492 | */ | 1519 | */ |
1493 | if (pos + len > inode->i_size) | 1520 | if (pos + len > inode->i_size) |
1521 | ext4_orphan_add(handle, inode); | ||
1522 | |||
1523 | ext4_journal_stop(handle); | ||
1524 | if (pos + len > inode->i_size) { | ||
1494 | vmtruncate(inode, inode->i_size); | 1525 | vmtruncate(inode, inode->i_size); |
1526 | /* | ||
1527 | * If vmtruncate failed early the inode might | ||
1528 | * still be on the orphan list; we need to | ||
1529 | * make sure the inode is removed from the | ||
1530 | * orphan list in that case. | ||
1531 | */ | ||
1532 | if (inode->i_nlink) | ||
1533 | ext4_orphan_del(NULL, inode); | ||
1534 | } | ||
1495 | } | 1535 | } |
1496 | 1536 | ||
1497 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1537 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
1509 | return ext4_handle_dirty_metadata(handle, NULL, bh); | 1549 | return ext4_handle_dirty_metadata(handle, NULL, bh); |
1510 | } | 1550 | } |
1511 | 1551 | ||
1552 | static int ext4_generic_write_end(struct file *file, | ||
1553 | struct address_space *mapping, | ||
1554 | loff_t pos, unsigned len, unsigned copied, | ||
1555 | struct page *page, void *fsdata) | ||
1556 | { | ||
1557 | int i_size_changed = 0; | ||
1558 | struct inode *inode = mapping->host; | ||
1559 | handle_t *handle = ext4_journal_current_handle(); | ||
1560 | |||
1561 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
1562 | |||
1563 | /* | ||
1564 | * No need to use i_size_read() here, the i_size | ||
1565 | * cannot change under us because we hold i_mutex. | ||
1566 | * | ||
1567 | * But it's important to update i_size while still holding page lock: | ||
1568 | * page writeout could otherwise come in and zero beyond i_size. | ||
1569 | */ | ||
1570 | if (pos + copied > inode->i_size) { | ||
1571 | i_size_write(inode, pos + copied); | ||
1572 | i_size_changed = 1; | ||
1573 | } | ||
1574 | |||
1575 | if (pos + copied > EXT4_I(inode)->i_disksize) { | ||
1576 | /* We need to mark inode dirty even if | ||
1577 | * new_i_size is less that inode->i_size | ||
1578 | * bu greater than i_disksize.(hint delalloc) | ||
1579 | */ | ||
1580 | ext4_update_i_disksize(inode, (pos + copied)); | ||
1581 | i_size_changed = 1; | ||
1582 | } | ||
1583 | unlock_page(page); | ||
1584 | page_cache_release(page); | ||
1585 | |||
1586 | /* | ||
1587 | * Don't mark the inode dirty under page lock. First, it unnecessarily | ||
1588 | * makes the holding time of page lock longer. Second, it forces lock | ||
1589 | * ordering of page lock and transaction start for journaling | ||
1590 | * filesystems. | ||
1591 | */ | ||
1592 | if (i_size_changed) | ||
1593 | ext4_mark_inode_dirty(handle, inode); | ||
1594 | |||
1595 | return copied; | ||
1596 | } | ||
1597 | |||
1512 | /* | 1598 | /* |
1513 | * We need to pick up the new inode size which generic_commit_write gave us | 1599 | * We need to pick up the new inode size which generic_commit_write gave us |
1514 | * `file' can be NULL - eg, when called from page_symlink(). | 1600 | * `file' can be NULL - eg, when called from page_symlink(). |
@@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file, | |||
1532 | ret = ext4_jbd2_file_inode(handle, inode); | 1618 | ret = ext4_jbd2_file_inode(handle, inode); |
1533 | 1619 | ||
1534 | if (ret == 0) { | 1620 | if (ret == 0) { |
1535 | loff_t new_i_size; | 1621 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1536 | |||
1537 | new_i_size = pos + copied; | ||
1538 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
1539 | ext4_update_i_disksize(inode, new_i_size); | ||
1540 | /* We need to mark inode dirty even if | ||
1541 | * new_i_size is less that inode->i_size | ||
1542 | * bu greater than i_disksize.(hint delalloc) | ||
1543 | */ | ||
1544 | ext4_mark_inode_dirty(handle, inode); | ||
1545 | } | ||
1546 | |||
1547 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
1548 | page, fsdata); | 1622 | page, fsdata); |
1549 | copied = ret2; | 1623 | copied = ret2; |
1624 | if (pos + len > inode->i_size) | ||
1625 | /* if we have allocated more blocks and copied | ||
1626 | * less. We will have blocks allocated outside | ||
1627 | * inode->i_size. So truncate them | ||
1628 | */ | ||
1629 | ext4_orphan_add(handle, inode); | ||
1550 | if (ret2 < 0) | 1630 | if (ret2 < 0) |
1551 | ret = ret2; | 1631 | ret = ret2; |
1552 | } | 1632 | } |
@@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
1554 | if (!ret) | 1634 | if (!ret) |
1555 | ret = ret2; | 1635 | ret = ret2; |
1556 | 1636 | ||
1637 | if (pos + len > inode->i_size) { | ||
1638 | vmtruncate(inode, inode->i_size); | ||
1639 | /* | ||
1640 | * If vmtruncate failed early the inode might still be | ||
1641 | * on the orphan list; we need to make sure the inode | ||
1642 | * is removed from the orphan list in that case. | ||
1643 | */ | ||
1644 | if (inode->i_nlink) | ||
1645 | ext4_orphan_del(NULL, inode); | ||
1646 | } | ||
1647 | |||
1648 | |||
1557 | return ret ? ret : copied; | 1649 | return ret ? ret : copied; |
1558 | } | 1650 | } |
1559 | 1651 | ||
@@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file, | |||
1565 | handle_t *handle = ext4_journal_current_handle(); | 1657 | handle_t *handle = ext4_journal_current_handle(); |
1566 | struct inode *inode = mapping->host; | 1658 | struct inode *inode = mapping->host; |
1567 | int ret = 0, ret2; | 1659 | int ret = 0, ret2; |
1568 | loff_t new_i_size; | ||
1569 | 1660 | ||
1570 | trace_mark(ext4_writeback_write_end, | 1661 | trace_mark(ext4_writeback_write_end, |
1571 | "dev %s ino %lu pos %llu len %u copied %u", | 1662 | "dev %s ino %lu pos %llu len %u copied %u", |
1572 | inode->i_sb->s_id, inode->i_ino, | 1663 | inode->i_sb->s_id, inode->i_ino, |
1573 | (unsigned long long) pos, len, copied); | 1664 | (unsigned long long) pos, len, copied); |
1574 | new_i_size = pos + copied; | 1665 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1575 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
1576 | ext4_update_i_disksize(inode, new_i_size); | ||
1577 | /* We need to mark inode dirty even if | ||
1578 | * new_i_size is less that inode->i_size | ||
1579 | * bu greater than i_disksize.(hint delalloc) | ||
1580 | */ | ||
1581 | ext4_mark_inode_dirty(handle, inode); | ||
1582 | } | ||
1583 | |||
1584 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
1585 | page, fsdata); | 1666 | page, fsdata); |
1586 | copied = ret2; | 1667 | copied = ret2; |
1668 | if (pos + len > inode->i_size) | ||
1669 | /* if we have allocated more blocks and copied | ||
1670 | * less. We will have blocks allocated outside | ||
1671 | * inode->i_size. So truncate them | ||
1672 | */ | ||
1673 | ext4_orphan_add(handle, inode); | ||
1674 | |||
1587 | if (ret2 < 0) | 1675 | if (ret2 < 0) |
1588 | ret = ret2; | 1676 | ret = ret2; |
1589 | 1677 | ||
@@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file, | |||
1591 | if (!ret) | 1679 | if (!ret) |
1592 | ret = ret2; | 1680 | ret = ret2; |
1593 | 1681 | ||
1682 | if (pos + len > inode->i_size) { | ||
1683 | vmtruncate(inode, inode->i_size); | ||
1684 | /* | ||
1685 | * If vmtruncate failed early the inode might still be | ||
1686 | * on the orphan list; we need to make sure the inode | ||
1687 | * is removed from the orphan list in that case. | ||
1688 | */ | ||
1689 | if (inode->i_nlink) | ||
1690 | ext4_orphan_del(NULL, inode); | ||
1691 | } | ||
1692 | |||
1594 | return ret ? ret : copied; | 1693 | return ret ? ret : copied; |
1595 | } | 1694 | } |
1596 | 1695 | ||
@@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file, | |||
1635 | } | 1734 | } |
1636 | 1735 | ||
1637 | unlock_page(page); | 1736 | unlock_page(page); |
1737 | page_cache_release(page); | ||
1738 | if (pos + len > inode->i_size) | ||
1739 | /* if we have allocated more blocks and copied | ||
1740 | * less. We will have blocks allocated outside | ||
1741 | * inode->i_size. So truncate them | ||
1742 | */ | ||
1743 | ext4_orphan_add(handle, inode); | ||
1744 | |||
1638 | ret2 = ext4_journal_stop(handle); | 1745 | ret2 = ext4_journal_stop(handle); |
1639 | if (!ret) | 1746 | if (!ret) |
1640 | ret = ret2; | 1747 | ret = ret2; |
1641 | page_cache_release(page); | 1748 | if (pos + len > inode->i_size) { |
1749 | vmtruncate(inode, inode->i_size); | ||
1750 | /* | ||
1751 | * If vmtruncate failed early the inode might still be | ||
1752 | * on the orphan list; we need to make sure the inode | ||
1753 | * is removed from the orphan list in that case. | ||
1754 | */ | ||
1755 | if (inode->i_nlink) | ||
1756 | ext4_orphan_del(NULL, inode); | ||
1757 | } | ||
1642 | 1758 | ||
1643 | return ret ? ret : copied; | 1759 | return ret ? ret : copied; |
1644 | } | 1760 | } |
@@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1852 | * @logical - first logical block to start assignment with | 1968 | * @logical - first logical block to start assignment with |
1853 | * | 1969 | * |
1854 | * the function goes through all passed space and put actual disk | 1970 | * the function goes through all passed space and put actual disk |
1855 | * block numbers into buffer heads, dropping BH_Delay | 1971 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten |
1856 | */ | 1972 | */ |
1857 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | 1973 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, |
1858 | struct buffer_head *exbh) | 1974 | struct buffer_head *exbh) |
@@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1902 | do { | 2018 | do { |
1903 | if (cur_logical >= logical + blocks) | 2019 | if (cur_logical >= logical + blocks) |
1904 | break; | 2020 | break; |
1905 | if (buffer_delay(bh)) { | 2021 | |
1906 | bh->b_blocknr = pblock; | 2022 | if (buffer_delay(bh) || |
1907 | clear_buffer_delay(bh); | 2023 | buffer_unwritten(bh)) { |
1908 | bh->b_bdev = inode->i_sb->s_bdev; | 2024 | |
1909 | } else if (buffer_unwritten(bh)) { | 2025 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); |
1910 | bh->b_blocknr = pblock; | 2026 | |
1911 | clear_buffer_unwritten(bh); | 2027 | if (buffer_delay(bh)) { |
1912 | set_buffer_mapped(bh); | 2028 | clear_buffer_delay(bh); |
1913 | set_buffer_new(bh); | 2029 | bh->b_blocknr = pblock; |
1914 | bh->b_bdev = inode->i_sb->s_bdev; | 2030 | } else { |
2031 | /* | ||
2032 | * unwritten already should have | ||
2033 | * blocknr assigned. Verify that | ||
2034 | */ | ||
2035 | clear_buffer_unwritten(bh); | ||
2036 | BUG_ON(bh->b_blocknr != pblock); | ||
2037 | } | ||
2038 | |||
1915 | } else if (buffer_mapped(bh)) | 2039 | } else if (buffer_mapped(bh)) |
1916 | BUG_ON(bh->b_blocknr != pblock); | 2040 | BUG_ON(bh->b_blocknr != pblock); |
1917 | 2041 | ||
@@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1990 | return; | 2114 | return; |
1991 | } | 2115 | } |
1992 | 2116 | ||
1993 | #define EXT4_DELALLOC_RSVED 1 | ||
1994 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
1995 | struct buffer_head *bh_result, int create) | ||
1996 | { | ||
1997 | int ret; | ||
1998 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
1999 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
2000 | handle_t *handle = NULL; | ||
2001 | |||
2002 | handle = ext4_journal_current_handle(); | ||
2003 | BUG_ON(!handle); | ||
2004 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2005 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2006 | if (ret <= 0) | ||
2007 | return ret; | ||
2008 | |||
2009 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2010 | |||
2011 | if (ext4_should_order_data(inode)) { | ||
2012 | int retval; | ||
2013 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2014 | if (retval) | ||
2015 | /* | ||
2016 | * Failed to add inode for ordered mode. Don't | ||
2017 | * update file size | ||
2018 | */ | ||
2019 | return retval; | ||
2020 | } | ||
2021 | |||
2022 | /* | ||
2023 | * Update on-disk size along with block allocation we don't | ||
2024 | * use 'extend_disksize' as size may change within already | ||
2025 | * allocated block -bzzz | ||
2026 | */ | ||
2027 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
2028 | if (disksize > i_size_read(inode)) | ||
2029 | disksize = i_size_read(inode); | ||
2030 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2031 | ext4_update_i_disksize(inode, disksize); | ||
2032 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2033 | return ret; | ||
2034 | } | ||
2035 | return 0; | ||
2036 | } | ||
2037 | |||
2038 | /* | 2117 | /* |
2039 | * mpage_da_map_blocks - go through given space | 2118 | * mpage_da_map_blocks - go through given space |
2040 | * | 2119 | * |
@@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2045 | */ | 2124 | */ |
2046 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2125 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
2047 | { | 2126 | { |
2048 | int err = 0; | 2127 | int err, blks, get_blocks_flags; |
2049 | struct buffer_head new; | 2128 | struct buffer_head new; |
2050 | sector_t next; | 2129 | sector_t next = mpd->b_blocknr; |
2130 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
2131 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
2132 | handle_t *handle = NULL; | ||
2051 | 2133 | ||
2052 | /* | 2134 | /* |
2053 | * We consider only non-mapped and non-allocated blocks | 2135 | * We consider only non-mapped and non-allocated blocks |
2054 | */ | 2136 | */ |
2055 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2137 | if ((mpd->b_state & (1 << BH_Mapped)) && |
2056 | !(mpd->b_state & (1 << BH_Delay))) | 2138 | !(mpd->b_state & (1 << BH_Delay)) && |
2139 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
2057 | return 0; | 2140 | return 0; |
2058 | new.b_state = mpd->b_state; | 2141 | |
2059 | new.b_blocknr = 0; | ||
2060 | new.b_size = mpd->b_size; | ||
2061 | next = mpd->b_blocknr; | ||
2062 | /* | 2142 | /* |
2063 | * If we didn't accumulate anything | 2143 | * If we didn't accumulate anything to write simply return |
2064 | * to write simply return | ||
2065 | */ | 2144 | */ |
2066 | if (!new.b_size) | 2145 | if (!mpd->b_size) |
2067 | return 0; | 2146 | return 0; |
2068 | 2147 | ||
2069 | err = ext4_da_get_block_write(mpd->inode, next, &new, 1); | 2148 | handle = ext4_journal_current_handle(); |
2070 | if (err) { | 2149 | BUG_ON(!handle); |
2150 | |||
2151 | /* | ||
2152 | * Call ext4_get_blocks() to allocate any delayed allocation | ||
2153 | * blocks, or to convert an uninitialized extent to be | ||
2154 | * initialized (in the case where we have written into | ||
2155 | * one or more preallocated blocks). | ||
2156 | * | ||
2157 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
2158 | * indicate that we are on the delayed allocation path. This | ||
2159 | * affects functions in many different parts of the allocation | ||
2160 | * call path. This flag exists primarily because we don't | ||
2161 | * want to change *many* call functions, so ext4_get_blocks() | ||
2162 | * will set the magic i_delalloc_reserved_flag once the | ||
2163 | * inode's allocation semaphore is taken. | ||
2164 | * | ||
2165 | * If the blocks in questions were delalloc blocks, set | ||
2166 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
2167 | * variables are updated after the blocks have been allocated. | ||
2168 | */ | ||
2169 | new.b_state = 0; | ||
2170 | get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | | ||
2171 | EXT4_GET_BLOCKS_DELALLOC_RESERVE); | ||
2172 | if (mpd->b_state & (1 << BH_Delay)) | ||
2173 | get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; | ||
2174 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | ||
2175 | &new, get_blocks_flags); | ||
2176 | if (blks < 0) { | ||
2177 | err = blks; | ||
2071 | /* | 2178 | /* |
2072 | * If get block returns with error we simply | 2179 | * If get block returns with error we simply |
2073 | * return. Later writepage will redirty the page and | 2180 | * return. Later writepage will redirty the page and |
@@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2100 | if (err == -ENOSPC) { | 2207 | if (err == -ENOSPC) { |
2101 | ext4_print_free_blocks(mpd->inode); | 2208 | ext4_print_free_blocks(mpd->inode); |
2102 | } | 2209 | } |
2103 | /* invlaidate all the pages */ | 2210 | /* invalidate all the pages */ |
2104 | ext4_da_block_invalidatepages(mpd, next, | 2211 | ext4_da_block_invalidatepages(mpd, next, |
2105 | mpd->b_size >> mpd->inode->i_blkbits); | 2212 | mpd->b_size >> mpd->inode->i_blkbits); |
2106 | return err; | 2213 | return err; |
2107 | } | 2214 | } |
2108 | BUG_ON(new.b_size == 0); | 2215 | BUG_ON(blks == 0); |
2216 | |||
2217 | new.b_size = (blks << mpd->inode->i_blkbits); | ||
2109 | 2218 | ||
2110 | if (buffer_new(&new)) | 2219 | if (buffer_new(&new)) |
2111 | __unmap_underlying_blocks(mpd->inode, &new); | 2220 | __unmap_underlying_blocks(mpd->inode, &new); |
@@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2118 | (mpd->b_state & (1 << BH_Unwritten))) | 2227 | (mpd->b_state & (1 << BH_Unwritten))) |
2119 | mpage_put_bnr_to_bhs(mpd, next, &new); | 2228 | mpage_put_bnr_to_bhs(mpd, next, &new); |
2120 | 2229 | ||
2230 | if (ext4_should_order_data(mpd->inode)) { | ||
2231 | err = ext4_jbd2_file_inode(handle, mpd->inode); | ||
2232 | if (err) | ||
2233 | return err; | ||
2234 | } | ||
2235 | |||
2236 | /* | ||
2237 | * Update on-disk size along with block allocation. | ||
2238 | */ | ||
2239 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
2240 | if (disksize > i_size_read(mpd->inode)) | ||
2241 | disksize = i_size_read(mpd->inode); | ||
2242 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
2243 | ext4_update_i_disksize(mpd->inode, disksize); | ||
2244 | return ext4_mark_inode_dirty(handle, mpd->inode); | ||
2245 | } | ||
2246 | |||
2121 | return 0; | 2247 | return 0; |
2122 | } | 2248 | } |
2123 | 2249 | ||
@@ -2192,6 +2318,17 @@ flush_it: | |||
2192 | return; | 2318 | return; |
2193 | } | 2319 | } |
2194 | 2320 | ||
2321 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
2322 | { | ||
2323 | /* | ||
2324 | * unmapped buffer is possible for holes. | ||
2325 | * delay buffer is possible with delayed allocation. | ||
2326 | * We also need to consider unwritten buffer as unmapped. | ||
2327 | */ | ||
2328 | return (!buffer_mapped(bh) || buffer_delay(bh) || | ||
2329 | buffer_unwritten(bh)) && buffer_dirty(bh); | ||
2330 | } | ||
2331 | |||
2195 | /* | 2332 | /* |
2196 | * __mpage_da_writepage - finds extent of pages and blocks | 2333 | * __mpage_da_writepage - finds extent of pages and blocks |
2197 | * | 2334 | * |
@@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page, | |||
2276 | * Otherwise we won't make progress | 2413 | * Otherwise we won't make progress |
2277 | * with the page in ext4_da_writepage | 2414 | * with the page in ext4_da_writepage |
2278 | */ | 2415 | */ |
2279 | if (buffer_dirty(bh) && | 2416 | if (ext4_bh_unmapped_or_delay(NULL, bh)) { |
2280 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
2281 | mpage_add_bh_to_extent(mpd, logical, | 2417 | mpage_add_bh_to_extent(mpd, logical, |
2282 | bh->b_size, | 2418 | bh->b_size, |
2283 | bh->b_state); | 2419 | bh->b_state); |
@@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page, | |||
2303 | } | 2439 | } |
2304 | 2440 | ||
2305 | /* | 2441 | /* |
2306 | * this is a special callback for ->write_begin() only | 2442 | * This is a special get_blocks_t callback which is used by |
2307 | * it's intention is to return mapped block or reserve space | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2444 | * reserve space for a single block. | ||
2445 | * | ||
2446 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. | ||
2447 | * We also have b_blocknr = -1 and b_bdev initialized properly | ||
2448 | * | ||
2449 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. | ||
2450 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev | ||
2451 | * initialized properly. | ||
2308 | */ | 2452 | */ |
2309 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2453 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2310 | struct buffer_head *bh_result, int create) | 2454 | struct buffer_head *bh_result, int create) |
@@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2323 | * preallocated blocks are unmapped but should treated | 2467 | * preallocated blocks are unmapped but should treated |
2324 | * the same as allocated blocks. | 2468 | * the same as allocated blocks. |
2325 | */ | 2469 | */ |
2326 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); | 2470 | ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); |
2327 | if ((ret == 0) && !buffer_delay(bh_result)) { | 2471 | if ((ret == 0) && !buffer_delay(bh_result)) { |
2328 | /* the block isn't (pre)allocated yet, let's reserve space */ | 2472 | /* the block isn't (pre)allocated yet, let's reserve space */ |
2329 | /* | 2473 | /* |
@@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2340 | set_buffer_delay(bh_result); | 2484 | set_buffer_delay(bh_result); |
2341 | } else if (ret > 0) { | 2485 | } else if (ret > 0) { |
2342 | bh_result->b_size = (ret << inode->i_blkbits); | 2486 | bh_result->b_size = (ret << inode->i_blkbits); |
2343 | /* | 2487 | if (buffer_unwritten(bh_result)) { |
2344 | * With sub-block writes into unwritten extents | 2488 | /* A delayed write to unwritten bh should |
2345 | * we also need to mark the buffer as new so that | 2489 | * be marked new and mapped. Mapped ensures |
2346 | * the unwritten parts of the buffer gets correctly zeroed. | 2490 | * that we don't do get_block multiple times |
2347 | */ | 2491 | * when we write to the same offset and new |
2348 | if (buffer_unwritten(bh_result)) | 2492 | * ensures that we do proper zero out for |
2493 | * partial write. | ||
2494 | */ | ||
2349 | set_buffer_new(bh_result); | 2495 | set_buffer_new(bh_result); |
2496 | set_buffer_mapped(bh_result); | ||
2497 | } | ||
2350 | ret = 0; | 2498 | ret = 0; |
2351 | } | 2499 | } |
2352 | 2500 | ||
2353 | return ret; | 2501 | return ret; |
2354 | } | 2502 | } |
2355 | 2503 | ||
2356 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | 2504 | /* |
2357 | { | 2505 | * This function is used as a standard get_block_t calback function |
2358 | /* | 2506 | * when there is no desire to allocate any blocks. It is used as a |
2359 | * unmapped buffer is possible for holes. | 2507 | * callback function for block_prepare_write(), nobh_writepage(), and |
2360 | * delay buffer is possible with delayed allocation | 2508 | * block_write_full_page(). These functions should only try to map a |
2361 | */ | 2509 | * single block at a time. |
2362 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | 2510 | * |
2363 | } | 2511 | * Since this function doesn't do block allocations even if the caller |
2364 | 2512 | * requests it by passing in create=1, it is critically important that | |
2365 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | 2513 | * any caller checks to make sure that any buffer heads are returned |
2514 | * by this function are either all already mapped or marked for | ||
2515 | * delayed allocation before calling nobh_writepage() or | ||
2516 | * block_write_full_page(). Otherwise, b_blocknr could be left | ||
2517 | * unitialized, and the page write functions will be taken by | ||
2518 | * surprise. | ||
2519 | */ | ||
2520 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
2366 | struct buffer_head *bh_result, int create) | 2521 | struct buffer_head *bh_result, int create) |
2367 | { | 2522 | { |
2368 | int ret = 0; | 2523 | int ret = 0; |
2369 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 2524 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
2370 | 2525 | ||
2526 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
2527 | |||
2371 | /* | 2528 | /* |
2372 | * we don't want to do block allocation in writepage | 2529 | * we don't want to do block allocation in writepage |
2373 | * so call get_block_wrap with create = 0 | 2530 | * so call get_block_wrap with create = 0 |
2374 | */ | 2531 | */ |
2375 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | 2532 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); |
2376 | bh_result, 0, 0, 0); | 2533 | BUG_ON(create && ret == 0); |
2377 | if (ret > 0) { | 2534 | if (ret > 0) { |
2378 | bh_result->b_size = (ret << inode->i_blkbits); | 2535 | bh_result->b_size = (ret << inode->i_blkbits); |
2379 | ret = 0; | 2536 | ret = 0; |
@@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | |||
2382 | } | 2539 | } |
2383 | 2540 | ||
2384 | /* | 2541 | /* |
2385 | * get called vi ext4_da_writepages after taking page lock (have journal handle) | 2542 | * This function can get called via... |
2386 | * get called via journal_submit_inode_data_buffers (no journal handle) | 2543 | * - ext4_da_writepages after taking page lock (have journal handle) |
2387 | * get called via shrink_page_list via pdflush (no journal handle) | 2544 | * - journal_submit_inode_data_buffers (no journal handle) |
2388 | * or grab_page_cache when doing write_begin (have journal handle) | 2545 | * - shrink_page_list via pdflush (no journal handle) |
2546 | * - grab_page_cache when doing write_begin (have journal handle) | ||
2389 | */ | 2547 | */ |
2390 | static int ext4_da_writepage(struct page *page, | 2548 | static int ext4_da_writepage(struct page *page, |
2391 | struct writeback_control *wbc) | 2549 | struct writeback_control *wbc) |
@@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page, | |||
2436 | * do block allocation here. | 2594 | * do block allocation here. |
2437 | */ | 2595 | */ |
2438 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2596 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
2439 | ext4_normal_get_block_write); | 2597 | noalloc_get_block_write); |
2440 | if (!ret) { | 2598 | if (!ret) { |
2441 | page_bufs = page_buffers(page); | 2599 | page_bufs = page_buffers(page); |
2442 | /* check whether all are mapped and non delay */ | 2600 | /* check whether all are mapped and non delay */ |
@@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page, | |||
2461 | } | 2619 | } |
2462 | 2620 | ||
2463 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2621 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
2464 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); | 2622 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); |
2465 | else | 2623 | else |
2466 | ret = block_write_full_page(page, | 2624 | ret = block_write_full_page(page, noalloc_get_block_write, |
2467 | ext4_normal_get_block_write, | 2625 | wbc); |
2468 | wbc); | ||
2469 | 2626 | ||
2470 | return ret; | 2627 | return ret; |
2471 | } | 2628 | } |
@@ -2777,7 +2934,7 @@ retry: | |||
2777 | *pagep = page; | 2934 | *pagep = page; |
2778 | 2935 | ||
2779 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 2936 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
2780 | ext4_da_get_block_prep); | 2937 | ext4_da_get_block_prep); |
2781 | if (ret < 0) { | 2938 | if (ret < 0) { |
2782 | unlock_page(page); | 2939 | unlock_page(page); |
2783 | ext4_journal_stop(handle); | 2940 | ext4_journal_stop(handle); |
@@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2815 | for (i = 0; i < idx; i++) | 2972 | for (i = 0; i < idx; i++) |
2816 | bh = bh->b_this_page; | 2973 | bh = bh->b_this_page; |
2817 | 2974 | ||
2818 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2975 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) |
2819 | return 0; | 2976 | return 0; |
2820 | return 1; | 2977 | return 1; |
2821 | } | 2978 | } |
@@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page, | |||
3085 | struct inode *inode = page->mapping->host; | 3242 | struct inode *inode = page->mapping->host; |
3086 | 3243 | ||
3087 | if (test_opt(inode->i_sb, NOBH)) | 3244 | if (test_opt(inode->i_sb, NOBH)) |
3088 | return nobh_writepage(page, | 3245 | return nobh_writepage(page, noalloc_get_block_write, wbc); |
3089 | ext4_normal_get_block_write, wbc); | ||
3090 | else | 3246 | else |
3091 | return block_write_full_page(page, | 3247 | return block_write_full_page(page, noalloc_get_block_write, |
3092 | ext4_normal_get_block_write, | 3248 | wbc); |
3093 | wbc); | ||
3094 | } | 3249 | } |
3095 | 3250 | ||
3096 | static int ext4_normal_writepage(struct page *page, | 3251 | static int ext4_normal_writepage(struct page *page, |
@@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
3142 | int err; | 3297 | int err; |
3143 | 3298 | ||
3144 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 3299 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
3145 | ext4_normal_get_block_write); | 3300 | noalloc_get_block_write); |
3146 | if (ret != 0) | 3301 | if (ret != 0) |
3147 | goto out_unlock; | 3302 | goto out_unlock; |
3148 | 3303 | ||
@@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page, | |||
3227 | * really know unless we go poke around in the buffer_heads. | 3382 | * really know unless we go poke around in the buffer_heads. |
3228 | * But block_write_full_page will do the right thing. | 3383 | * But block_write_full_page will do the right thing. |
3229 | */ | 3384 | */ |
3230 | return block_write_full_page(page, | 3385 | return block_write_full_page(page, noalloc_get_block_write, |
3231 | ext4_normal_get_block_write, | 3386 | wbc); |
3232 | wbc); | ||
3233 | } | 3387 | } |
3234 | no_write: | 3388 | no_write: |
3235 | redirty_page_for_writepage(wbc, page); | 3389 | redirty_page_for_writepage(wbc, page); |
@@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode) | |||
3973 | if (!ext4_can_truncate(inode)) | 4127 | if (!ext4_can_truncate(inode)) |
3974 | return; | 4128 | return; |
3975 | 4129 | ||
3976 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4130 | if (ei->i_disksize && inode->i_size == 0 && |
4131 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3977 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 4132 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
3978 | 4133 | ||
3979 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4134 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
@@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
4715 | return ext4_force_commit(inode->i_sb); | 4870 | return ext4_force_commit(inode->i_sb); |
4716 | } | 4871 | } |
4717 | 4872 | ||
4718 | int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh) | ||
4719 | { | ||
4720 | int err = 0; | ||
4721 | |||
4722 | mark_buffer_dirty(bh); | ||
4723 | if (inode && inode_needs_sync(inode)) { | ||
4724 | sync_dirty_buffer(bh); | ||
4725 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | ||
4726 | ext4_error(inode->i_sb, __func__, | ||
4727 | "IO error syncing inode, " | ||
4728 | "inode=%lu, block=%llu", | ||
4729 | inode->i_ino, | ||
4730 | (unsigned long long)bh->b_blocknr); | ||
4731 | err = -EIO; | ||
4732 | } | ||
4733 | } | ||
4734 | return err; | ||
4735 | } | ||
4736 | |||
4737 | /* | 4873 | /* |
4738 | * ext4_setattr() | 4874 | * ext4_setattr() |
4739 | * | 4875 | * |
@@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4930 | */ | 5066 | */ |
4931 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5067 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
4932 | { | 5068 | { |
4933 | int groups, gdpblocks; | 5069 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5070 | int gdpblocks; | ||
4934 | int idxblocks; | 5071 | int idxblocks; |
4935 | int ret = 0; | 5072 | int ret = 0; |
4936 | 5073 | ||
@@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4957 | groups += nrblocks; | 5094 | groups += nrblocks; |
4958 | 5095 | ||
4959 | gdpblocks = groups; | 5096 | gdpblocks = groups; |
4960 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | 5097 | if (groups > ngroups) |
4961 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | 5098 | groups = ngroups; |
4962 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | 5099 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) |
4963 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | 5100 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; |
4964 | 5101 | ||
@@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
4998 | * Calculate the journal credits for a chunk of data modification. | 5135 | * Calculate the journal credits for a chunk of data modification. |
4999 | * | 5136 | * |
5000 | * This is called from DIO, fallocate or whoever calling | 5137 | * This is called from DIO, fallocate or whoever calling |
5001 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | 5138 | * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. |
5002 | * | 5139 | * |
5003 | * journal buffers for data blocks are not included here, as DIO | 5140 | * journal buffers for data blocks are not included here, as DIO |
5004 | * and fallocate do no need to journal data buffers. | 5141 | * and fallocate do no need to journal data buffers. |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f871677a7984..ed8482e22c0e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr) | |||
372 | ext4_set_bit(bit, addr); | 372 | ext4_set_bit(bit, addr); |
373 | } | 373 | } |
374 | 374 | ||
375 | static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) | ||
376 | { | ||
377 | addr = mb_correct_addr_and_bit(&bit, addr); | ||
378 | ext4_set_bit_atomic(lock, bit, addr); | ||
379 | } | ||
380 | |||
381 | static inline void mb_clear_bit(int bit, void *addr) | 375 | static inline void mb_clear_bit(int bit, void *addr) |
382 | { | 376 | { |
383 | addr = mb_correct_addr_and_bit(&bit, addr); | 377 | addr = mb_correct_addr_and_bit(&bit, addr); |
384 | ext4_clear_bit(bit, addr); | 378 | ext4_clear_bit(bit, addr); |
385 | } | 379 | } |
386 | 380 | ||
387 | static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) | ||
388 | { | ||
389 | addr = mb_correct_addr_and_bit(&bit, addr); | ||
390 | ext4_clear_bit_atomic(lock, bit, addr); | ||
391 | } | ||
392 | |||
393 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) | 381 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) |
394 | { | 382 | { |
395 | int fix = 0, ret, tmpmax; | 383 | int fix = 0, ret, tmpmax; |
@@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
448 | 436 | ||
449 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) | 437 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) |
450 | return; | 438 | return; |
451 | BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); | 439 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); |
452 | for (i = 0; i < count; i++) { | 440 | for (i = 0; i < count; i++) { |
453 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { | 441 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { |
454 | ext4_fsblk_t blocknr; | 442 | ext4_fsblk_t blocknr; |
@@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) | |||
472 | 460 | ||
473 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) | 461 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) |
474 | return; | 462 | return; |
475 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | 463 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
476 | for (i = 0; i < count; i++) { | 464 | for (i = 0; i < count; i++) { |
477 | BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); | 465 | BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); |
478 | mb_set_bit(first + i, e4b->bd_info->bb_bitmap); | 466 | mb_set_bit(first + i, e4b->bd_info->bb_bitmap); |
@@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, | |||
739 | 727 | ||
740 | static int ext4_mb_init_cache(struct page *page, char *incore) | 728 | static int ext4_mb_init_cache(struct page *page, char *incore) |
741 | { | 729 | { |
730 | ext4_group_t ngroups; | ||
742 | int blocksize; | 731 | int blocksize; |
743 | int blocks_per_page; | 732 | int blocks_per_page; |
744 | int groups_per_page; | 733 | int groups_per_page; |
@@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
757 | 746 | ||
758 | inode = page->mapping->host; | 747 | inode = page->mapping->host; |
759 | sb = inode->i_sb; | 748 | sb = inode->i_sb; |
749 | ngroups = ext4_get_groups_count(sb); | ||
760 | blocksize = 1 << inode->i_blkbits; | 750 | blocksize = 1 << inode->i_blkbits; |
761 | blocks_per_page = PAGE_CACHE_SIZE / blocksize; | 751 | blocks_per_page = PAGE_CACHE_SIZE / blocksize; |
762 | 752 | ||
@@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
780 | for (i = 0; i < groups_per_page; i++) { | 770 | for (i = 0; i < groups_per_page; i++) { |
781 | struct ext4_group_desc *desc; | 771 | struct ext4_group_desc *desc; |
782 | 772 | ||
783 | if (first_group + i >= EXT4_SB(sb)->s_groups_count) | 773 | if (first_group + i >= ngroups) |
784 | break; | 774 | break; |
785 | 775 | ||
786 | err = -EIO; | 776 | err = -EIO; |
@@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
801 | unlock_buffer(bh[i]); | 791 | unlock_buffer(bh[i]); |
802 | continue; | 792 | continue; |
803 | } | 793 | } |
804 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 794 | ext4_lock_group(sb, first_group + i); |
805 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 795 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
806 | ext4_init_block_bitmap(sb, bh[i], | 796 | ext4_init_block_bitmap(sb, bh[i], |
807 | first_group + i, desc); | 797 | first_group + i, desc); |
808 | set_bitmap_uptodate(bh[i]); | 798 | set_bitmap_uptodate(bh[i]); |
809 | set_buffer_uptodate(bh[i]); | 799 | set_buffer_uptodate(bh[i]); |
810 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 800 | ext4_unlock_group(sb, first_group + i); |
811 | unlock_buffer(bh[i]); | 801 | unlock_buffer(bh[i]); |
812 | continue; | 802 | continue; |
813 | } | 803 | } |
814 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 804 | ext4_unlock_group(sb, first_group + i); |
815 | if (buffer_uptodate(bh[i])) { | 805 | if (buffer_uptodate(bh[i])) { |
816 | /* | 806 | /* |
817 | * if not uninit if bh is uptodate, | 807 | * if not uninit if bh is uptodate, |
@@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
852 | struct ext4_group_info *grinfo; | 842 | struct ext4_group_info *grinfo; |
853 | 843 | ||
854 | group = (first_block + i) >> 1; | 844 | group = (first_block + i) >> 1; |
855 | if (group >= EXT4_SB(sb)->s_groups_count) | 845 | if (group >= ngroups) |
856 | break; | 846 | break; |
857 | 847 | ||
858 | /* | 848 | /* |
@@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) | |||
1078 | return 0; | 1068 | return 0; |
1079 | } | 1069 | } |
1080 | 1070 | ||
1081 | static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) | 1071 | static void mb_clear_bits(void *bm, int cur, int len) |
1082 | { | 1072 | { |
1083 | __u32 *addr; | 1073 | __u32 *addr; |
1084 | 1074 | ||
@@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
1091 | cur += 32; | 1081 | cur += 32; |
1092 | continue; | 1082 | continue; |
1093 | } | 1083 | } |
1094 | if (lock) | 1084 | mb_clear_bit(cur, bm); |
1095 | mb_clear_bit_atomic(lock, cur, bm); | ||
1096 | else | ||
1097 | mb_clear_bit(cur, bm); | ||
1098 | cur++; | 1085 | cur++; |
1099 | } | 1086 | } |
1100 | } | 1087 | } |
1101 | 1088 | ||
1102 | static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | 1089 | static void mb_set_bits(void *bm, int cur, int len) |
1103 | { | 1090 | { |
1104 | __u32 *addr; | 1091 | __u32 *addr; |
1105 | 1092 | ||
@@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
1112 | cur += 32; | 1099 | cur += 32; |
1113 | continue; | 1100 | continue; |
1114 | } | 1101 | } |
1115 | if (lock) | 1102 | mb_set_bit(cur, bm); |
1116 | mb_set_bit_atomic(lock, cur, bm); | ||
1117 | else | ||
1118 | mb_set_bit(cur, bm); | ||
1119 | cur++; | 1103 | cur++; |
1120 | } | 1104 | } |
1121 | } | 1105 | } |
@@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1131 | struct super_block *sb = e4b->bd_sb; | 1115 | struct super_block *sb = e4b->bd_sb; |
1132 | 1116 | ||
1133 | BUG_ON(first + count > (sb->s_blocksize << 3)); | 1117 | BUG_ON(first + count > (sb->s_blocksize << 3)); |
1134 | BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); | 1118 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); |
1135 | mb_check_buddy(e4b); | 1119 | mb_check_buddy(e4b); |
1136 | mb_free_blocks_double(inode, e4b, first, count); | 1120 | mb_free_blocks_double(inode, e4b, first, count); |
1137 | 1121 | ||
@@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1212 | int ord; | 1196 | int ord; |
1213 | void *buddy; | 1197 | void *buddy; |
1214 | 1198 | ||
1215 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | 1199 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
1216 | BUG_ON(ex == NULL); | 1200 | BUG_ON(ex == NULL); |
1217 | 1201 | ||
1218 | buddy = mb_find_buddy(e4b, order, &max); | 1202 | buddy = mb_find_buddy(e4b, order, &max); |
@@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1276 | 1260 | ||
1277 | BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); | 1261 | BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); |
1278 | BUG_ON(e4b->bd_group != ex->fe_group); | 1262 | BUG_ON(e4b->bd_group != ex->fe_group); |
1279 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | 1263 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
1280 | mb_check_buddy(e4b); | 1264 | mb_check_buddy(e4b); |
1281 | mb_mark_used_double(e4b, start, len); | 1265 | mb_mark_used_double(e4b, start, len); |
1282 | 1266 | ||
@@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1330 | e4b->bd_info->bb_counters[ord]++; | 1314 | e4b->bd_info->bb_counters[ord]++; |
1331 | } | 1315 | } |
1332 | 1316 | ||
1333 | mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), | 1317 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); |
1334 | EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | ||
1335 | mb_check_buddy(e4b); | 1318 | mb_check_buddy(e4b); |
1336 | 1319 | ||
1337 | return ret; | 1320 | return ret; |
@@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1726 | unsigned free, fragments; | 1709 | unsigned free, fragments; |
1727 | unsigned i, bits; | 1710 | unsigned i, bits; |
1728 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); | 1711 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); |
1729 | struct ext4_group_desc *desc; | ||
1730 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | 1712 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
1731 | 1713 | ||
1732 | BUG_ON(cr < 0 || cr >= 4); | 1714 | BUG_ON(cr < 0 || cr >= 4); |
@@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1742 | switch (cr) { | 1724 | switch (cr) { |
1743 | case 0: | 1725 | case 0: |
1744 | BUG_ON(ac->ac_2order == 0); | 1726 | BUG_ON(ac->ac_2order == 0); |
1745 | /* If this group is uninitialized, skip it initially */ | ||
1746 | desc = ext4_get_group_desc(ac->ac_sb, group, NULL); | ||
1747 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1748 | return 0; | ||
1749 | 1727 | ||
1750 | /* Avoid using the first bg of a flexgroup for data files */ | 1728 | /* Avoid using the first bg of a flexgroup for data files */ |
1751 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && | 1729 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && |
@@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | |||
1788 | int block, pnum; | 1766 | int block, pnum; |
1789 | int blocks_per_page; | 1767 | int blocks_per_page; |
1790 | int groups_per_page; | 1768 | int groups_per_page; |
1769 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1791 | ext4_group_t first_group; | 1770 | ext4_group_t first_group; |
1792 | struct ext4_group_info *grp; | 1771 | struct ext4_group_info *grp; |
1793 | 1772 | ||
@@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | |||
1807 | /* read all groups the page covers into the cache */ | 1786 | /* read all groups the page covers into the cache */ |
1808 | for (i = 0; i < groups_per_page; i++) { | 1787 | for (i = 0; i < groups_per_page; i++) { |
1809 | 1788 | ||
1810 | if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) | 1789 | if ((first_group + i) >= ngroups) |
1811 | break; | 1790 | break; |
1812 | grp = ext4_get_group_info(sb, first_group + i); | 1791 | grp = ext4_get_group_info(sb, first_group + i); |
1813 | /* take all groups write allocation | 1792 | /* take all groups write allocation |
@@ -1945,8 +1924,7 @@ err: | |||
1945 | static noinline_for_stack int | 1924 | static noinline_for_stack int |
1946 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1925 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1947 | { | 1926 | { |
1948 | ext4_group_t group; | 1927 | ext4_group_t ngroups, group, i; |
1949 | ext4_group_t i; | ||
1950 | int cr; | 1928 | int cr; |
1951 | int err = 0; | 1929 | int err = 0; |
1952 | int bsbits; | 1930 | int bsbits; |
@@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1957 | 1935 | ||
1958 | sb = ac->ac_sb; | 1936 | sb = ac->ac_sb; |
1959 | sbi = EXT4_SB(sb); | 1937 | sbi = EXT4_SB(sb); |
1938 | ngroups = ext4_get_groups_count(sb); | ||
1960 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 1939 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
1961 | 1940 | ||
1962 | /* first, try the goal */ | 1941 | /* first, try the goal */ |
@@ -2017,11 +1996,11 @@ repeat: | |||
2017 | */ | 1996 | */ |
2018 | group = ac->ac_g_ex.fe_group; | 1997 | group = ac->ac_g_ex.fe_group; |
2019 | 1998 | ||
2020 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { | 1999 | for (i = 0; i < ngroups; group++, i++) { |
2021 | struct ext4_group_info *grp; | 2000 | struct ext4_group_info *grp; |
2022 | struct ext4_group_desc *desc; | 2001 | struct ext4_group_desc *desc; |
2023 | 2002 | ||
2024 | if (group == EXT4_SB(sb)->s_groups_count) | 2003 | if (group == ngroups) |
2025 | group = 0; | 2004 | group = 0; |
2026 | 2005 | ||
2027 | /* quick check to skip empty groups */ | 2006 | /* quick check to skip empty groups */ |
@@ -2064,9 +2043,7 @@ repeat: | |||
2064 | 2043 | ||
2065 | ac->ac_groups_scanned++; | 2044 | ac->ac_groups_scanned++; |
2066 | desc = ext4_get_group_desc(sb, group, NULL); | 2045 | desc = ext4_get_group_desc(sb, group, NULL); |
2067 | if (cr == 0 || (desc->bg_flags & | 2046 | if (cr == 0) |
2068 | cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && | ||
2069 | ac->ac_2order != 0)) | ||
2070 | ext4_mb_simple_scan_group(ac, &e4b); | 2047 | ext4_mb_simple_scan_group(ac, &e4b); |
2071 | else if (cr == 1 && | 2048 | else if (cr == 1 && |
2072 | ac->ac_g_ex.fe_len == sbi->s_stripe) | 2049 | ac->ac_g_ex.fe_len == sbi->s_stripe) |
@@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = { | |||
2315 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | 2292 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) |
2316 | { | 2293 | { |
2317 | struct super_block *sb = seq->private; | 2294 | struct super_block *sb = seq->private; |
2318 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2319 | ext4_group_t group; | 2295 | ext4_group_t group; |
2320 | 2296 | ||
2321 | if (*pos < 0 || *pos >= sbi->s_groups_count) | 2297 | if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) |
2322 | return NULL; | 2298 | return NULL; |
2323 | |||
2324 | group = *pos + 1; | 2299 | group = *pos + 1; |
2325 | return (void *) ((unsigned long) group); | 2300 | return (void *) ((unsigned long) group); |
2326 | } | 2301 | } |
@@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | |||
2328 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) | 2303 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) |
2329 | { | 2304 | { |
2330 | struct super_block *sb = seq->private; | 2305 | struct super_block *sb = seq->private; |
2331 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2332 | ext4_group_t group; | 2306 | ext4_group_t group; |
2333 | 2307 | ||
2334 | ++*pos; | 2308 | ++*pos; |
2335 | if (*pos < 0 || *pos >= sbi->s_groups_count) | 2309 | if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) |
2336 | return NULL; | 2310 | return NULL; |
2337 | group = *pos + 1; | 2311 | group = *pos + 1; |
2338 | return (void *) ((unsigned long) group); | 2312 | return (void *) ((unsigned long) group); |
@@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
2420 | 2394 | ||
2421 | if (sbi->s_proc != NULL) { | 2395 | if (sbi->s_proc != NULL) { |
2422 | remove_proc_entry("mb_groups", sbi->s_proc); | 2396 | remove_proc_entry("mb_groups", sbi->s_proc); |
2423 | remove_proc_entry("mb_history", sbi->s_proc); | 2397 | if (sbi->s_mb_history_max) |
2398 | remove_proc_entry("mb_history", sbi->s_proc); | ||
2424 | } | 2399 | } |
2425 | kfree(sbi->s_mb_history); | 2400 | kfree(sbi->s_mb_history); |
2426 | } | 2401 | } |
@@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
2431 | int i; | 2406 | int i; |
2432 | 2407 | ||
2433 | if (sbi->s_proc != NULL) { | 2408 | if (sbi->s_proc != NULL) { |
2434 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, | 2409 | if (sbi->s_mb_history_max) |
2435 | &ext4_mb_seq_history_fops, sb); | 2410 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
2411 | &ext4_mb_seq_history_fops, sb); | ||
2436 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2412 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2437 | &ext4_mb_seq_groups_fops, sb); | 2413 | &ext4_mb_seq_groups_fops, sb); |
2438 | } | 2414 | } |
2439 | 2415 | ||
2440 | sbi->s_mb_history_max = 1000; | ||
2441 | sbi->s_mb_history_cur = 0; | 2416 | sbi->s_mb_history_cur = 0; |
2442 | spin_lock_init(&sbi->s_mb_history_lock); | 2417 | spin_lock_init(&sbi->s_mb_history_lock); |
2443 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); | 2418 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); |
2444 | sbi->s_mb_history = kzalloc(i, GFP_KERNEL); | 2419 | sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; |
2445 | /* if we can't allocate history, then we simple won't use it */ | 2420 | /* if we can't allocate history, then we simple won't use it */ |
2446 | } | 2421 | } |
2447 | 2422 | ||
@@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) | |||
2451 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 2426 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
2452 | struct ext4_mb_history h; | 2427 | struct ext4_mb_history h; |
2453 | 2428 | ||
2454 | if (unlikely(sbi->s_mb_history == NULL)) | 2429 | if (sbi->s_mb_history == NULL) |
2455 | return; | 2430 | return; |
2456 | 2431 | ||
2457 | if (!(ac->ac_op & sbi->s_mb_history_filter)) | 2432 | if (!(ac->ac_op & sbi->s_mb_history_filter)) |
@@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | |||
2587 | 2562 | ||
2588 | static int ext4_mb_init_backend(struct super_block *sb) | 2563 | static int ext4_mb_init_backend(struct super_block *sb) |
2589 | { | 2564 | { |
2565 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
2590 | ext4_group_t i; | 2566 | ext4_group_t i; |
2591 | int metalen; | 2567 | int metalen; |
2592 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2568 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2598 | struct ext4_group_desc *desc; | 2574 | struct ext4_group_desc *desc; |
2599 | 2575 | ||
2600 | /* This is the number of blocks used by GDT */ | 2576 | /* This is the number of blocks used by GDT */ |
2601 | num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - | 2577 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
2602 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | 2578 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); |
2603 | 2579 | ||
2604 | /* | 2580 | /* |
@@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2644 | for (i = 0; i < num_meta_group_infos; i++) { | 2620 | for (i = 0; i < num_meta_group_infos; i++) { |
2645 | if ((i + 1) == num_meta_group_infos) | 2621 | if ((i + 1) == num_meta_group_infos) |
2646 | metalen = sizeof(*meta_group_info) * | 2622 | metalen = sizeof(*meta_group_info) * |
2647 | (sbi->s_groups_count - | 2623 | (ngroups - |
2648 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | 2624 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); |
2649 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2625 | meta_group_info = kmalloc(metalen, GFP_KERNEL); |
2650 | if (meta_group_info == NULL) { | 2626 | if (meta_group_info == NULL) { |
@@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2655 | sbi->s_group_info[i] = meta_group_info; | 2631 | sbi->s_group_info[i] = meta_group_info; |
2656 | } | 2632 | } |
2657 | 2633 | ||
2658 | for (i = 0; i < sbi->s_groups_count; i++) { | 2634 | for (i = 0; i < ngroups; i++) { |
2659 | desc = ext4_get_group_desc(sb, i, NULL); | 2635 | desc = ext4_get_group_desc(sb, i, NULL); |
2660 | if (desc == NULL) { | 2636 | if (desc == NULL) { |
2661 | printk(KERN_ERR | 2637 | printk(KERN_ERR |
@@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2761 | return 0; | 2737 | return 0; |
2762 | } | 2738 | } |
2763 | 2739 | ||
2764 | /* need to called with ext4 group lock (ext4_lock_group) */ | 2740 | /* need to called with the ext4 group lock held */ |
2765 | static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | 2741 | static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) |
2766 | { | 2742 | { |
2767 | struct ext4_prealloc_space *pa; | 2743 | struct ext4_prealloc_space *pa; |
@@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2781 | 2757 | ||
2782 | int ext4_mb_release(struct super_block *sb) | 2758 | int ext4_mb_release(struct super_block *sb) |
2783 | { | 2759 | { |
2760 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
2784 | ext4_group_t i; | 2761 | ext4_group_t i; |
2785 | int num_meta_group_infos; | 2762 | int num_meta_group_infos; |
2786 | struct ext4_group_info *grinfo; | 2763 | struct ext4_group_info *grinfo; |
2787 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2764 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2788 | 2765 | ||
2789 | if (sbi->s_group_info) { | 2766 | if (sbi->s_group_info) { |
2790 | for (i = 0; i < sbi->s_groups_count; i++) { | 2767 | for (i = 0; i < ngroups; i++) { |
2791 | grinfo = ext4_get_group_info(sb, i); | 2768 | grinfo = ext4_get_group_info(sb, i); |
2792 | #ifdef DOUBLE_CHECK | 2769 | #ifdef DOUBLE_CHECK |
2793 | kfree(grinfo->bb_bitmap); | 2770 | kfree(grinfo->bb_bitmap); |
@@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2797 | ext4_unlock_group(sb, i); | 2774 | ext4_unlock_group(sb, i); |
2798 | kfree(grinfo); | 2775 | kfree(grinfo); |
2799 | } | 2776 | } |
2800 | num_meta_group_infos = (sbi->s_groups_count + | 2777 | num_meta_group_infos = (ngroups + |
2801 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2778 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
2802 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2779 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2803 | for (i = 0; i < num_meta_group_infos; i++) | 2780 | for (i = 0; i < num_meta_group_infos; i++) |
@@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2984 | + le32_to_cpu(es->s_first_data_block); | 2961 | + le32_to_cpu(es->s_first_data_block); |
2985 | 2962 | ||
2986 | len = ac->ac_b_ex.fe_len; | 2963 | len = ac->ac_b_ex.fe_len; |
2987 | if (in_range(ext4_block_bitmap(sb, gdp), block, len) || | 2964 | if (!ext4_data_block_valid(sbi, block, len)) { |
2988 | in_range(ext4_inode_bitmap(sb, gdp), block, len) || | ||
2989 | in_range(block, ext4_inode_table(sb, gdp), | ||
2990 | EXT4_SB(sb)->s_itb_per_group) || | ||
2991 | in_range(block + len - 1, ext4_inode_table(sb, gdp), | ||
2992 | EXT4_SB(sb)->s_itb_per_group)) { | ||
2993 | ext4_error(sb, __func__, | 2965 | ext4_error(sb, __func__, |
2994 | "Allocating block %llu in system zone of %d group\n", | 2966 | "Allocating blocks %llu-%llu which overlap " |
2995 | block, ac->ac_b_ex.fe_group); | 2967 | "fs metadata\n", block, block+len); |
2996 | /* File system mounted not to panic on error | 2968 | /* File system mounted not to panic on error |
2997 | * Fix the bitmap and repeat the block allocation | 2969 | * Fix the bitmap and repeat the block allocation |
2998 | * We leak some of the blocks here. | 2970 | * We leak some of the blocks here. |
2999 | */ | 2971 | */ |
3000 | mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), | 2972 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); |
3001 | bitmap_bh->b_data, ac->ac_b_ex.fe_start, | 2973 | mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, |
3002 | ac->ac_b_ex.fe_len); | 2974 | ac->ac_b_ex.fe_len); |
2975 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | ||
3003 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 2976 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
3004 | if (!err) | 2977 | if (!err) |
3005 | err = -EAGAIN; | 2978 | err = -EAGAIN; |
3006 | goto out_err; | 2979 | goto out_err; |
3007 | } | 2980 | } |
2981 | |||
2982 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); | ||
3008 | #ifdef AGGRESSIVE_CHECK | 2983 | #ifdef AGGRESSIVE_CHECK |
3009 | { | 2984 | { |
3010 | int i; | 2985 | int i; |
@@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
3014 | } | 2989 | } |
3015 | } | 2990 | } |
3016 | #endif | 2991 | #endif |
3017 | spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2992 | mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); |
3018 | mb_set_bits(NULL, bitmap_bh->b_data, | ||
3019 | ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); | ||
3020 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 2993 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
3021 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 2994 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
3022 | ext4_free_blks_set(sb, gdp, | 2995 | ext4_free_blks_set(sb, gdp, |
@@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
3026 | len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; | 2999 | len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; |
3027 | ext4_free_blks_set(sb, gdp, len); | 3000 | ext4_free_blks_set(sb, gdp, len); |
3028 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 3001 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
3029 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 3002 | |
3003 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | ||
3030 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | 3004 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); |
3031 | /* | 3005 | /* |
3032 | * Now reduce the dirty block count also. Should not go negative | 3006 | * Now reduce the dirty block count also. Should not go negative |
@@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3459 | * the function goes through all block freed in the group | 3433 | * the function goes through all block freed in the group |
3460 | * but not yet committed and marks them used in in-core bitmap. | 3434 | * but not yet committed and marks them used in in-core bitmap. |
3461 | * buddy must be generated from this bitmap | 3435 | * buddy must be generated from this bitmap |
3462 | * Need to be called with ext4 group lock (ext4_lock_group) | 3436 | * Need to be called with the ext4 group lock held |
3463 | */ | 3437 | */ |
3464 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 3438 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
3465 | ext4_group_t group) | 3439 | ext4_group_t group) |
@@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3473 | 3447 | ||
3474 | while (n) { | 3448 | while (n) { |
3475 | entry = rb_entry(n, struct ext4_free_data, node); | 3449 | entry = rb_entry(n, struct ext4_free_data, node); |
3476 | mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), | 3450 | mb_set_bits(bitmap, entry->start_blk, entry->count); |
3477 | bitmap, entry->start_blk, | ||
3478 | entry->count); | ||
3479 | n = rb_next(n); | 3451 | n = rb_next(n); |
3480 | } | 3452 | } |
3481 | return; | 3453 | return; |
@@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3484 | /* | 3456 | /* |
3485 | * the function goes through all preallocation in this group and marks them | 3457 | * the function goes through all preallocation in this group and marks them |
3486 | * used in in-core bitmap. buddy must be generated from this bitmap | 3458 | * used in in-core bitmap. buddy must be generated from this bitmap |
3487 | * Need to be called with ext4 group lock (ext4_lock_group) | 3459 | * Need to be called with ext4 group lock held |
3488 | */ | 3460 | */ |
3489 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 3461 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
3490 | ext4_group_t group) | 3462 | ext4_group_t group) |
@@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3516 | if (unlikely(len == 0)) | 3488 | if (unlikely(len == 0)) |
3517 | continue; | 3489 | continue; |
3518 | BUG_ON(groupnr != group); | 3490 | BUG_ON(groupnr != group); |
3519 | mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), | 3491 | mb_set_bits(bitmap, start, len); |
3520 | bitmap, start, len); | ||
3521 | preallocated += len; | 3492 | preallocated += len; |
3522 | count++; | 3493 | count++; |
3523 | } | 3494 | } |
@@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, | |||
4121 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 4092 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
4122 | { | 4093 | { |
4123 | struct super_block *sb = ac->ac_sb; | 4094 | struct super_block *sb = ac->ac_sb; |
4124 | ext4_group_t i; | 4095 | ext4_group_t ngroups, i; |
4125 | 4096 | ||
4126 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 4097 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
4127 | " Allocation context details:\n"); | 4098 | " Allocation context details:\n"); |
@@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4145 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, | 4116 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, |
4146 | ac->ac_found); | 4117 | ac->ac_found); |
4147 | printk(KERN_ERR "EXT4-fs: groups: \n"); | 4118 | printk(KERN_ERR "EXT4-fs: groups: \n"); |
4148 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 4119 | ngroups = ext4_get_groups_count(sb); |
4120 | for (i = 0; i < ngroups; i++) { | ||
4149 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); | 4121 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
4150 | struct ext4_prealloc_space *pa; | 4122 | struct ext4_prealloc_space *pa; |
4151 | ext4_grpblk_t start; | 4123 | ext4_grpblk_t start; |
@@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4469 | 4441 | ||
4470 | static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | 4442 | static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) |
4471 | { | 4443 | { |
4472 | ext4_group_t i; | 4444 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
4473 | int ret; | 4445 | int ret; |
4474 | int freed = 0; | 4446 | int freed = 0; |
4475 | 4447 | ||
4476 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", | 4448 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", |
4477 | sb->s_id, needed); | 4449 | sb->s_id, needed); |
4478 | for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { | 4450 | for (i = 0; i < ngroups && needed > 0; i++) { |
4479 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); | 4451 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); |
4480 | freed += ret; | 4452 | freed += ret; |
4481 | needed -= ret; | 4453 | needed -= ret; |
@@ -4859,29 +4831,25 @@ do_more: | |||
4859 | new_entry->group = block_group; | 4831 | new_entry->group = block_group; |
4860 | new_entry->count = count; | 4832 | new_entry->count = count; |
4861 | new_entry->t_tid = handle->h_transaction->t_tid; | 4833 | new_entry->t_tid = handle->h_transaction->t_tid; |
4834 | |||
4862 | ext4_lock_group(sb, block_group); | 4835 | ext4_lock_group(sb, block_group); |
4863 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | 4836 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4864 | bit, count); | ||
4865 | ext4_mb_free_metadata(handle, &e4b, new_entry); | 4837 | ext4_mb_free_metadata(handle, &e4b, new_entry); |
4866 | ext4_unlock_group(sb, block_group); | ||
4867 | } else { | 4838 | } else { |
4868 | ext4_lock_group(sb, block_group); | ||
4869 | /* need to update group_info->bb_free and bitmap | 4839 | /* need to update group_info->bb_free and bitmap |
4870 | * with group lock held. generate_buddy look at | 4840 | * with group lock held. generate_buddy look at |
4871 | * them with group lock_held | 4841 | * them with group lock_held |
4872 | */ | 4842 | */ |
4873 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | 4843 | ext4_lock_group(sb, block_group); |
4874 | bit, count); | 4844 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4875 | mb_free_blocks(inode, &e4b, bit, count); | 4845 | mb_free_blocks(inode, &e4b, bit, count); |
4876 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4846 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
4877 | ext4_unlock_group(sb, block_group); | ||
4878 | } | 4847 | } |
4879 | 4848 | ||
4880 | spin_lock(sb_bgl_lock(sbi, block_group)); | ||
4881 | ret = ext4_free_blks_count(sb, gdp) + count; | 4849 | ret = ext4_free_blks_count(sb, gdp) + count; |
4882 | ext4_free_blks_set(sb, gdp, ret); | 4850 | ext4_free_blks_set(sb, gdp, ret); |
4883 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 4851 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
4884 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 4852 | ext4_unlock_group(sb, block_group); |
4885 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 4853 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
4886 | 4854 | ||
4887 | if (sbi->s_log_groups_per_flex) { | 4855 | if (sbi->s_log_groups_per_flex) { |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index dd9e6cd5f6cf..75e34f69215b 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include "ext4_jbd2.h" | 24 | #include "ext4_jbd2.h" |
25 | #include "ext4.h" | 25 | #include "ext4.h" |
26 | #include "group.h" | ||
27 | 26 | ||
28 | /* | 27 | /* |
29 | * with AGGRESSIVE_CHECK allocator runs consistency checks over | 28 | * with AGGRESSIVE_CHECK allocator runs consistency checks over |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 22098e1cd085..07eb6649e4fa 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include "ext4.h" | 37 | #include "ext4.h" |
38 | #include "ext4_jbd2.h" | 38 | #include "ext4_jbd2.h" |
39 | 39 | ||
40 | #include "namei.h" | ||
41 | #include "xattr.h" | 40 | #include "xattr.h" |
42 | #include "acl.h" | 41 | #include "acl.h" |
43 | 42 | ||
@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, | |||
750 | ext4fs_dirhash(de->name, de->name_len, &h); | 749 | ext4fs_dirhash(de->name, de->name_len, &h); |
751 | map_tail--; | 750 | map_tail--; |
752 | map_tail->hash = h.hash; | 751 | map_tail->hash = h.hash; |
753 | map_tail->offs = (u16) ((char *) de - base); | 752 | map_tail->offs = ((char *) de - base)>>2; |
754 | map_tail->size = le16_to_cpu(de->rec_len); | 753 | map_tail->size = le16_to_cpu(de->rec_len); |
755 | count++; | 754 | count++; |
756 | cond_resched(); | 755 | cond_resched(); |
@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, | |||
1148 | unsigned rec_len = 0; | 1147 | unsigned rec_len = 0; |
1149 | 1148 | ||
1150 | while (count--) { | 1149 | while (count--) { |
1151 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); | 1150 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) |
1151 | (from + (map->offs<<2)); | ||
1152 | rec_len = EXT4_DIR_REC_LEN(de->name_len); | 1152 | rec_len = EXT4_DIR_REC_LEN(de->name_len); |
1153 | memcpy (to, de, rec_len); | 1153 | memcpy (to, de, rec_len); |
1154 | ((struct ext4_dir_entry_2 *) to)->rec_len = | 1154 | ((struct ext4_dir_entry_2 *) to)->rec_len = |
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
1997 | if (!ext4_handle_valid(handle)) | 1997 | if (!ext4_handle_valid(handle)) |
1998 | return 0; | 1998 | return 0; |
1999 | 1999 | ||
2000 | lock_super(sb); | 2000 | mutex_lock(&EXT4_SB(sb)->s_orphan_lock); |
2001 | if (!list_empty(&EXT4_I(inode)->i_orphan)) | 2001 | if (!list_empty(&EXT4_I(inode)->i_orphan)) |
2002 | goto out_unlock; | 2002 | goto out_unlock; |
2003 | 2003 | ||
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2006 | 2006 | ||
2007 | /* @@@ FIXME: Observation from aviro: | 2007 | /* @@@ FIXME: Observation from aviro: |
2008 | * I think I can trigger J_ASSERT in ext4_orphan_add(). We block | 2008 | * I think I can trigger J_ASSERT in ext4_orphan_add(). We block |
2009 | * here (on lock_super()), so race with ext4_link() which might bump | 2009 | * here (on s_orphan_lock), so race with ext4_link() which might bump |
2010 | * ->i_nlink. For, say it, character device. Not a regular file, | 2010 | * ->i_nlink. For, say it, character device. Not a regular file, |
2011 | * not a directory, not a symlink and ->i_nlink > 0. | 2011 | * not a directory, not a symlink and ->i_nlink > 0. |
2012 | * | ||
2013 | * tytso, 4/25/2009: I'm not sure how that could happen; | ||
2014 | * shouldn't the fs core protect us from these sort of | ||
2015 | * unlink()/link() races? | ||
2012 | */ | 2016 | */ |
2013 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 2017 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
2014 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 2018 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2045 | jbd_debug(4, "orphan inode %lu will point to %d\n", | 2049 | jbd_debug(4, "orphan inode %lu will point to %d\n", |
2046 | inode->i_ino, NEXT_ORPHAN(inode)); | 2050 | inode->i_ino, NEXT_ORPHAN(inode)); |
2047 | out_unlock: | 2051 | out_unlock: |
2048 | unlock_super(sb); | 2052 | mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); |
2049 | ext4_std_error(inode->i_sb, err); | 2053 | ext4_std_error(inode->i_sb, err); |
2050 | return err; | 2054 | return err; |
2051 | } | 2055 | } |
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2066 | if (!ext4_handle_valid(handle)) | 2070 | if (!ext4_handle_valid(handle)) |
2067 | return 0; | 2071 | return 0; |
2068 | 2072 | ||
2069 | lock_super(inode->i_sb); | 2073 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); |
2070 | if (list_empty(&ei->i_orphan)) { | 2074 | if (list_empty(&ei->i_orphan)) |
2071 | unlock_super(inode->i_sb); | 2075 | goto out; |
2072 | return 0; | ||
2073 | } | ||
2074 | 2076 | ||
2075 | ino_next = NEXT_ORPHAN(inode); | 2077 | ino_next = NEXT_ORPHAN(inode); |
2076 | prev = ei->i_orphan.prev; | 2078 | prev = ei->i_orphan.prev; |
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2120 | out_err: | 2122 | out_err: |
2121 | ext4_std_error(inode->i_sb, err); | 2123 | ext4_std_error(inode->i_sb, err); |
2122 | out: | 2124 | out: |
2123 | unlock_super(inode->i_sb); | 2125 | mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); |
2124 | return err; | 2126 | return err; |
2125 | 2127 | ||
2126 | out_brelse: | 2128 | out_brelse: |
@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2533 | .removexattr = generic_removexattr, | 2535 | .removexattr = generic_removexattr, |
2534 | #endif | 2536 | #endif |
2535 | .permission = ext4_permission, | 2537 | .permission = ext4_permission, |
2538 | .fiemap = ext4_fiemap, | ||
2536 | }; | 2539 | }; |
2537 | 2540 | ||
2538 | const struct inode_operations ext4_special_inode_operations = { | 2541 | const struct inode_operations ext4_special_inode_operations = { |
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h deleted file mode 100644 index 5e4dfff36a00..000000000000 --- a/fs/ext4/namei.h +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | /* linux/fs/ext4/namei.h | ||
2 | * | ||
3 | * Copyright (C) 2005 Simtec Electronics | ||
4 | * Ben Dooks <ben@simtec.co.uk> | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | extern struct dentry *ext4_get_parent(struct dentry *child); | ||
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 546c7dd869e1..27eb289eea37 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | 16 | ||
17 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
18 | #include "group.h" | ||
19 | 18 | ||
20 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) | 19 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) |
21 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) | 20 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) |
@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
193 | if (IS_ERR(handle)) | 192 | if (IS_ERR(handle)) |
194 | return PTR_ERR(handle); | 193 | return PTR_ERR(handle); |
195 | 194 | ||
196 | lock_super(sb); | 195 | mutex_lock(&sbi->s_resize_lock); |
197 | if (input->group != sbi->s_groups_count) { | 196 | if (input->group != sbi->s_groups_count) { |
198 | err = -EBUSY; | 197 | err = -EBUSY; |
199 | goto exit_journal; | 198 | goto exit_journal; |
@@ -302,7 +301,7 @@ exit_bh: | |||
302 | brelse(bh); | 301 | brelse(bh); |
303 | 302 | ||
304 | exit_journal: | 303 | exit_journal: |
305 | unlock_super(sb); | 304 | mutex_unlock(&sbi->s_resize_lock); |
306 | if ((err2 = ext4_journal_stop(handle)) && !err) | 305 | if ((err2 = ext4_journal_stop(handle)) && !err) |
307 | err = err2; | 306 | err = err2; |
308 | 307 | ||
@@ -643,11 +642,12 @@ exit_free: | |||
643 | * important part is that the new block and inode counts are in the backup | 642 | * important part is that the new block and inode counts are in the backup |
644 | * superblocks, and the location of the new group metadata in the GDT backups. | 643 | * superblocks, and the location of the new group metadata in the GDT backups. |
645 | * | 644 | * |
646 | * We do not need lock_super() for this, because these blocks are not | 645 | * We do not need take the s_resize_lock for this, because these |
647 | * otherwise touched by the filesystem code when it is mounted. We don't | 646 | * blocks are not otherwise touched by the filesystem code when it is |
648 | * need to worry about last changing from sbi->s_groups_count, because the | 647 | * mounted. We don't need to worry about last changing from |
649 | * worst that can happen is that we do not copy the full number of backups | 648 | * sbi->s_groups_count, because the worst that can happen is that we |
650 | * at this time. The resize which changed s_groups_count will backup again. | 649 | * do not copy the full number of backups at this time. The resize |
650 | * which changed s_groups_count will backup again. | ||
651 | */ | 651 | */ |
652 | static void update_backups(struct super_block *sb, | 652 | static void update_backups(struct super_block *sb, |
653 | int blk_off, char *data, int size) | 653 | int blk_off, char *data, int size) |
@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
809 | goto exit_put; | 809 | goto exit_put; |
810 | } | 810 | } |
811 | 811 | ||
812 | lock_super(sb); | 812 | mutex_lock(&sbi->s_resize_lock); |
813 | if (input->group != sbi->s_groups_count) { | 813 | if (input->group != sbi->s_groups_count) { |
814 | ext4_warning(sb, __func__, | 814 | ext4_warning(sb, __func__, |
815 | "multiple resizers run on filesystem!"); | 815 | "multiple resizers run on filesystem!"); |
@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
840 | /* | 840 | /* |
841 | * OK, now we've set up the new group. Time to make it active. | 841 | * OK, now we've set up the new group. Time to make it active. |
842 | * | 842 | * |
843 | * Current kernels don't lock all allocations via lock_super(), | 843 | * We do not lock all allocations via s_resize_lock |
844 | * so we have to be safe wrt. concurrent accesses the group | 844 | * so we have to be safe wrt. concurrent accesses the group |
845 | * data. So we need to be careful to set all of the relevant | 845 | * data. So we need to be careful to set all of the relevant |
846 | * group descriptor data etc. *before* we enable the group. | 846 | * group descriptor data etc. *before* we enable the group. |
@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
900 | * | 900 | * |
901 | * The precise rules we use are: | 901 | * The precise rules we use are: |
902 | * | 902 | * |
903 | * * Writers of s_groups_count *must* hold lock_super | 903 | * * Writers of s_groups_count *must* hold s_resize_lock |
904 | * AND | 904 | * AND |
905 | * * Writers must perform a smp_wmb() after updating all dependent | 905 | * * Writers must perform a smp_wmb() after updating all dependent |
906 | * data and before modifying the groups count | 906 | * data and before modifying the groups count |
907 | * | 907 | * |
908 | * * Readers must hold lock_super() over the access | 908 | * * Readers must hold s_resize_lock over the access |
909 | * OR | 909 | * OR |
910 | * * Readers must perform an smp_rmb() after reading the groups count | 910 | * * Readers must perform an smp_rmb() after reading the groups count |
911 | * and before reading any dependent data. | 911 | * and before reading any dependent data. |
@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
948 | sb->s_dirt = 1; | 948 | sb->s_dirt = 1; |
949 | 949 | ||
950 | exit_journal: | 950 | exit_journal: |
951 | unlock_super(sb); | 951 | mutex_unlock(&sbi->s_resize_lock); |
952 | if ((err2 = ext4_journal_stop(handle)) && !err) | 952 | if ((err2 = ext4_journal_stop(handle)) && !err) |
953 | err = err2; | 953 | err = err2; |
954 | if (!err) { | 954 | if (!err) { |
@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
986 | 986 | ||
987 | /* We don't need to worry about locking wrt other resizers just | 987 | /* We don't need to worry about locking wrt other resizers just |
988 | * yet: we're going to revalidate es->s_blocks_count after | 988 | * yet: we're going to revalidate es->s_blocks_count after |
989 | * taking lock_super() below. */ | 989 | * taking the s_resize_lock below. */ |
990 | o_blocks_count = ext4_blocks_count(es); | 990 | o_blocks_count = ext4_blocks_count(es); |
991 | o_groups_count = EXT4_SB(sb)->s_groups_count; | 991 | o_groups_count = EXT4_SB(sb)->s_groups_count; |
992 | 992 | ||
@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1056 | goto exit_put; | 1056 | goto exit_put; |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | lock_super(sb); | 1059 | mutex_lock(&EXT4_SB(sb)->s_resize_lock); |
1060 | if (o_blocks_count != ext4_blocks_count(es)) { | 1060 | if (o_blocks_count != ext4_blocks_count(es)) { |
1061 | ext4_warning(sb, __func__, | 1061 | ext4_warning(sb, __func__, |
1062 | "multiple resizers run on filesystem!"); | 1062 | "multiple resizers run on filesystem!"); |
1063 | unlock_super(sb); | 1063 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1064 | ext4_journal_stop(handle); | 1064 | ext4_journal_stop(handle); |
1065 | err = -EBUSY; | 1065 | err = -EBUSY; |
1066 | goto exit_put; | 1066 | goto exit_put; |
@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1070 | EXT4_SB(sb)->s_sbh))) { | 1070 | EXT4_SB(sb)->s_sbh))) { |
1071 | ext4_warning(sb, __func__, | 1071 | ext4_warning(sb, __func__, |
1072 | "error %d on journal write access", err); | 1072 | "error %d on journal write access", err); |
1073 | unlock_super(sb); | 1073 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1074 | ext4_journal_stop(handle); | 1074 | ext4_journal_stop(handle); |
1075 | goto exit_put; | 1075 | goto exit_put; |
1076 | } | 1076 | } |
1077 | ext4_blocks_count_set(es, o_blocks_count + add); | 1077 | ext4_blocks_count_set(es, o_blocks_count + add); |
1078 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | 1078 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); |
1079 | sb->s_dirt = 1; | 1079 | sb->s_dirt = 1; |
1080 | unlock_super(sb); | 1080 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1081 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1081 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1082 | o_blocks_count + add); | 1082 | o_blocks_count + add); |
1083 | /* We add the blocks to the bitmap and set the group need init bit */ | 1083 | /* We add the blocks to the bitmap and set the group need init bit */ |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2958f4e6f222..012c4251397e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/string.h> | 20 | #include <linux/string.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <linux/jbd2.h> | 24 | #include <linux/jbd2.h> |
24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
25 | #include <linux/init.h> | 26 | #include <linux/init.h> |
@@ -45,16 +46,20 @@ | |||
45 | #include "ext4_jbd2.h" | 46 | #include "ext4_jbd2.h" |
46 | #include "xattr.h" | 47 | #include "xattr.h" |
47 | #include "acl.h" | 48 | #include "acl.h" |
48 | #include "namei.h" | 49 | |
49 | #include "group.h" | 50 | static int default_mb_history_length = 1000; |
51 | |||
52 | module_param_named(default_mb_history_length, default_mb_history_length, | ||
53 | int, 0644); | ||
54 | MODULE_PARM_DESC(default_mb_history_length, | ||
55 | "Default number of entries saved for mb_history"); | ||
50 | 56 | ||
51 | struct proc_dir_entry *ext4_proc_root; | 57 | struct proc_dir_entry *ext4_proc_root; |
52 | static struct kset *ext4_kset; | 58 | static struct kset *ext4_kset; |
53 | 59 | ||
54 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 60 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
55 | unsigned long journal_devnum); | 61 | unsigned long journal_devnum); |
56 | static int ext4_commit_super(struct super_block *sb, | 62 | static int ext4_commit_super(struct super_block *sb, int sync); |
57 | struct ext4_super_block *es, int sync); | ||
58 | static void ext4_mark_recovery_complete(struct super_block *sb, | 63 | static void ext4_mark_recovery_complete(struct super_block *sb, |
59 | struct ext4_super_block *es); | 64 | struct ext4_super_block *es); |
60 | static void ext4_clear_journal_err(struct super_block *sb, | 65 | static void ext4_clear_journal_err(struct super_block *sb, |
@@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | |||
74 | { | 79 | { |
75 | return le32_to_cpu(bg->bg_block_bitmap_lo) | | 80 | return le32_to_cpu(bg->bg_block_bitmap_lo) | |
76 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 81 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
77 | (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); | 82 | (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); |
78 | } | 83 | } |
79 | 84 | ||
80 | ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | 85 | ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, |
@@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | |||
82 | { | 87 | { |
83 | return le32_to_cpu(bg->bg_inode_bitmap_lo) | | 88 | return le32_to_cpu(bg->bg_inode_bitmap_lo) | |
84 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 89 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
85 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); | 90 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); |
86 | } | 91 | } |
87 | 92 | ||
88 | ext4_fsblk_t ext4_inode_table(struct super_block *sb, | 93 | ext4_fsblk_t ext4_inode_table(struct super_block *sb, |
@@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, | |||
90 | { | 95 | { |
91 | return le32_to_cpu(bg->bg_inode_table_lo) | | 96 | return le32_to_cpu(bg->bg_inode_table_lo) | |
92 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 97 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
93 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); | 98 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); |
94 | } | 99 | } |
95 | 100 | ||
96 | __u32 ext4_free_blks_count(struct super_block *sb, | 101 | __u32 ext4_free_blks_count(struct super_block *sb, |
@@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb, | |||
98 | { | 103 | { |
99 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | | 104 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | |
100 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 105 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
101 | (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); | 106 | (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); |
102 | } | 107 | } |
103 | 108 | ||
104 | __u32 ext4_free_inodes_count(struct super_block *sb, | 109 | __u32 ext4_free_inodes_count(struct super_block *sb, |
@@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb, | |||
106 | { | 111 | { |
107 | return le16_to_cpu(bg->bg_free_inodes_count_lo) | | 112 | return le16_to_cpu(bg->bg_free_inodes_count_lo) | |
108 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 113 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
109 | (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); | 114 | (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); |
110 | } | 115 | } |
111 | 116 | ||
112 | __u32 ext4_used_dirs_count(struct super_block *sb, | 117 | __u32 ext4_used_dirs_count(struct super_block *sb, |
@@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb, | |||
114 | { | 119 | { |
115 | return le16_to_cpu(bg->bg_used_dirs_count_lo) | | 120 | return le16_to_cpu(bg->bg_used_dirs_count_lo) | |
116 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 121 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
117 | (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); | 122 | (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); |
118 | } | 123 | } |
119 | 124 | ||
120 | __u32 ext4_itable_unused_count(struct super_block *sb, | 125 | __u32 ext4_itable_unused_count(struct super_block *sb, |
@@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb, | |||
122 | { | 127 | { |
123 | return le16_to_cpu(bg->bg_itable_unused_lo) | | 128 | return le16_to_cpu(bg->bg_itable_unused_lo) | |
124 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 129 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
125 | (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); | 130 | (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); |
126 | } | 131 | } |
127 | 132 | ||
128 | void ext4_block_bitmap_set(struct super_block *sb, | 133 | void ext4_block_bitmap_set(struct super_block *sb, |
@@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
202 | journal = EXT4_SB(sb)->s_journal; | 207 | journal = EXT4_SB(sb)->s_journal; |
203 | if (journal) { | 208 | if (journal) { |
204 | if (is_journal_aborted(journal)) { | 209 | if (is_journal_aborted(journal)) { |
205 | ext4_abort(sb, __func__, | 210 | ext4_abort(sb, __func__, "Detected aborted journal"); |
206 | "Detected aborted journal"); | ||
207 | return ERR_PTR(-EROFS); | 211 | return ERR_PTR(-EROFS); |
208 | } | 212 | } |
209 | return jbd2_journal_start(journal, nblocks); | 213 | return jbd2_journal_start(journal, nblocks); |
@@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb) | |||
302 | jbd2_journal_abort(journal, -EIO); | 306 | jbd2_journal_abort(journal, -EIO); |
303 | } | 307 | } |
304 | if (test_opt(sb, ERRORS_RO)) { | 308 | if (test_opt(sb, ERRORS_RO)) { |
305 | printk(KERN_CRIT "Remounting filesystem read-only\n"); | 309 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
306 | sb->s_flags |= MS_RDONLY; | 310 | sb->s_flags |= MS_RDONLY; |
307 | } | 311 | } |
308 | ext4_commit_super(sb, es, 1); | 312 | ext4_commit_super(sb, 1); |
309 | if (test_opt(sb, ERRORS_PANIC)) | 313 | if (test_opt(sb, ERRORS_PANIC)) |
310 | panic("EXT4-fs (device %s): panic forced after error\n", | 314 | panic("EXT4-fs (device %s): panic forced after error\n", |
311 | sb->s_id); | 315 | sb->s_id); |
@@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function, | |||
395 | { | 399 | { |
396 | va_list args; | 400 | va_list args; |
397 | 401 | ||
398 | printk(KERN_CRIT "ext4_abort called.\n"); | ||
399 | |||
400 | va_start(args, fmt); | 402 | va_start(args, fmt); |
401 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 403 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); |
402 | vprintk(fmt, args); | 404 | vprintk(fmt, args); |
@@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function, | |||
409 | if (sb->s_flags & MS_RDONLY) | 411 | if (sb->s_flags & MS_RDONLY) |
410 | return; | 412 | return; |
411 | 413 | ||
412 | printk(KERN_CRIT "Remounting filesystem read-only\n"); | 414 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
413 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 415 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
414 | sb->s_flags |= MS_RDONLY; | 416 | sb->s_flags |= MS_RDONLY; |
415 | EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; | 417 | EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; |
@@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function, | |||
417 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 419 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
418 | } | 420 | } |
419 | 421 | ||
422 | void ext4_msg (struct super_block * sb, const char *prefix, | ||
423 | const char *fmt, ...) | ||
424 | { | ||
425 | va_list args; | ||
426 | |||
427 | va_start(args, fmt); | ||
428 | printk("%sEXT4-fs (%s): ", prefix, sb->s_id); | ||
429 | vprintk(fmt, args); | ||
430 | printk("\n"); | ||
431 | va_end(args); | ||
432 | } | ||
433 | |||
420 | void ext4_warning(struct super_block *sb, const char *function, | 434 | void ext4_warning(struct super_block *sb, const char *function, |
421 | const char *fmt, ...) | 435 | const char *fmt, ...) |
422 | { | 436 | { |
@@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function, | |||
431 | } | 445 | } |
432 | 446 | ||
433 | void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, | 447 | void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, |
434 | const char *function, const char *fmt, ...) | 448 | const char *function, const char *fmt, ...) |
435 | __releases(bitlock) | 449 | __releases(bitlock) |
436 | __acquires(bitlock) | 450 | __acquires(bitlock) |
437 | { | 451 | { |
@@ -447,7 +461,7 @@ __acquires(bitlock) | |||
447 | if (test_opt(sb, ERRORS_CONT)) { | 461 | if (test_opt(sb, ERRORS_CONT)) { |
448 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 462 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
449 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 463 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
450 | ext4_commit_super(sb, es, 0); | 464 | ext4_commit_super(sb, 0); |
451 | return; | 465 | return; |
452 | } | 466 | } |
453 | ext4_unlock_group(sb, grp); | 467 | ext4_unlock_group(sb, grp); |
@@ -467,7 +481,6 @@ __acquires(bitlock) | |||
467 | return; | 481 | return; |
468 | } | 482 | } |
469 | 483 | ||
470 | |||
471 | void ext4_update_dynamic_rev(struct super_block *sb) | 484 | void ext4_update_dynamic_rev(struct super_block *sb) |
472 | { | 485 | { |
473 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 486 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
@@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
496 | /* | 509 | /* |
497 | * Open the external journal device | 510 | * Open the external journal device |
498 | */ | 511 | */ |
499 | static struct block_device *ext4_blkdev_get(dev_t dev) | 512 | static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) |
500 | { | 513 | { |
501 | struct block_device *bdev; | 514 | struct block_device *bdev; |
502 | char b[BDEVNAME_SIZE]; | 515 | char b[BDEVNAME_SIZE]; |
@@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev) | |||
507 | return bdev; | 520 | return bdev; |
508 | 521 | ||
509 | fail: | 522 | fail: |
510 | printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", | 523 | ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", |
511 | __bdevname(dev, b), PTR_ERR(bdev)); | 524 | __bdevname(dev, b), PTR_ERR(bdev)); |
512 | return NULL; | 525 | return NULL; |
513 | } | 526 | } |
@@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) | |||
543 | { | 556 | { |
544 | struct list_head *l; | 557 | struct list_head *l; |
545 | 558 | ||
546 | printk(KERN_ERR "sb orphan head is %d\n", | 559 | ext4_msg(sb, KERN_ERR, "sb orphan head is %d", |
547 | le32_to_cpu(sbi->s_es->s_last_orphan)); | 560 | le32_to_cpu(sbi->s_es->s_last_orphan)); |
548 | 561 | ||
549 | printk(KERN_ERR "sb_info orphan list:\n"); | 562 | printk(KERN_ERR "sb_info orphan list:\n"); |
550 | list_for_each(l, &sbi->s_orphan) { | 563 | list_for_each(l, &sbi->s_orphan) { |
@@ -563,6 +576,12 @@ static void ext4_put_super(struct super_block *sb) | |||
563 | struct ext4_super_block *es = sbi->s_es; | 576 | struct ext4_super_block *es = sbi->s_es; |
564 | int i, err; | 577 | int i, err; |
565 | 578 | ||
579 | lock_super(sb); | ||
580 | lock_kernel(); | ||
581 | if (sb->s_dirt) | ||
582 | ext4_commit_super(sb, 1); | ||
583 | |||
584 | ext4_release_system_zone(sb); | ||
566 | ext4_mb_release(sb); | 585 | ext4_mb_release(sb); |
567 | ext4_ext_release(sb); | 586 | ext4_ext_release(sb); |
568 | ext4_xattr_put_super(sb); | 587 | ext4_xattr_put_super(sb); |
@@ -576,7 +595,7 @@ static void ext4_put_super(struct super_block *sb) | |||
576 | if (!(sb->s_flags & MS_RDONLY)) { | 595 | if (!(sb->s_flags & MS_RDONLY)) { |
577 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 596 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
578 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 597 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
579 | ext4_commit_super(sb, es, 1); | 598 | ext4_commit_super(sb, 1); |
580 | } | 599 | } |
581 | if (sbi->s_proc) { | 600 | if (sbi->s_proc) { |
582 | remove_proc_entry(sb->s_id, ext4_proc_root); | 601 | remove_proc_entry(sb->s_id, ext4_proc_root); |
@@ -586,7 +605,10 @@ static void ext4_put_super(struct super_block *sb) | |||
586 | for (i = 0; i < sbi->s_gdb_count; i++) | 605 | for (i = 0; i < sbi->s_gdb_count; i++) |
587 | brelse(sbi->s_group_desc[i]); | 606 | brelse(sbi->s_group_desc[i]); |
588 | kfree(sbi->s_group_desc); | 607 | kfree(sbi->s_group_desc); |
589 | kfree(sbi->s_flex_groups); | 608 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
609 | vfree(sbi->s_flex_groups); | ||
610 | else | ||
611 | kfree(sbi->s_flex_groups); | ||
590 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 612 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
591 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 613 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
592 | percpu_counter_destroy(&sbi->s_dirs_counter); | 614 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -625,11 +647,8 @@ static void ext4_put_super(struct super_block *sb) | |||
625 | unlock_super(sb); | 647 | unlock_super(sb); |
626 | kobject_put(&sbi->s_kobj); | 648 | kobject_put(&sbi->s_kobj); |
627 | wait_for_completion(&sbi->s_kobj_unregister); | 649 | wait_for_completion(&sbi->s_kobj_unregister); |
628 | lock_super(sb); | ||
629 | lock_kernel(); | ||
630 | kfree(sbi->s_blockgroup_lock); | 650 | kfree(sbi->s_blockgroup_lock); |
631 | kfree(sbi); | 651 | kfree(sbi); |
632 | return; | ||
633 | } | 652 | } |
634 | 653 | ||
635 | static struct kmem_cache *ext4_inode_cachep; | 654 | static struct kmem_cache *ext4_inode_cachep; |
@@ -644,6 +663,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
644 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 663 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
645 | if (!ei) | 664 | if (!ei) |
646 | return NULL; | 665 | return NULL; |
666 | |||
647 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 667 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
648 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 668 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
649 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 669 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
@@ -664,14 +684,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
664 | ei->i_allocated_meta_blocks = 0; | 684 | ei->i_allocated_meta_blocks = 0; |
665 | ei->i_delalloc_reserved_flag = 0; | 685 | ei->i_delalloc_reserved_flag = 0; |
666 | spin_lock_init(&(ei->i_block_reservation_lock)); | 686 | spin_lock_init(&(ei->i_block_reservation_lock)); |
687 | |||
667 | return &ei->vfs_inode; | 688 | return &ei->vfs_inode; |
668 | } | 689 | } |
669 | 690 | ||
670 | static void ext4_destroy_inode(struct inode *inode) | 691 | static void ext4_destroy_inode(struct inode *inode) |
671 | { | 692 | { |
672 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 693 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
673 | printk("EXT4 Inode %p: orphan list check failed!\n", | 694 | ext4_msg(inode->i_sb, KERN_ERR, |
674 | EXT4_I(inode)); | 695 | "Inode %lu (%p): orphan list check failed!", |
696 | inode->i_ino, EXT4_I(inode)); | ||
675 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, | 697 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, |
676 | EXT4_I(inode), sizeof(struct ext4_inode_info), | 698 | EXT4_I(inode), sizeof(struct ext4_inode_info), |
677 | true); | 699 | true); |
@@ -870,12 +892,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
870 | seq_puts(seq, ",noauto_da_alloc"); | 892 | seq_puts(seq, ",noauto_da_alloc"); |
871 | 893 | ||
872 | ext4_show_quota_options(seq, sb); | 894 | ext4_show_quota_options(seq, sb); |
895 | |||
873 | return 0; | 896 | return 0; |
874 | } | 897 | } |
875 | 898 | ||
876 | |||
877 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, | 899 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, |
878 | u64 ino, u32 generation) | 900 | u64 ino, u32 generation) |
879 | { | 901 | { |
880 | struct inode *inode; | 902 | struct inode *inode; |
881 | 903 | ||
@@ -904,14 +926,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, | |||
904 | } | 926 | } |
905 | 927 | ||
906 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, | 928 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, |
907 | int fh_len, int fh_type) | 929 | int fh_len, int fh_type) |
908 | { | 930 | { |
909 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | 931 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
910 | ext4_nfs_get_inode); | 932 | ext4_nfs_get_inode); |
911 | } | 933 | } |
912 | 934 | ||
913 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | 935 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, |
914 | int fh_len, int fh_type) | 936 | int fh_len, int fh_type) |
915 | { | 937 | { |
916 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | 938 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, |
917 | ext4_nfs_get_inode); | 939 | ext4_nfs_get_inode); |
@@ -923,7 +945,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
923 | * which would prevent try_to_free_buffers() from freeing them, we must use | 945 | * which would prevent try_to_free_buffers() from freeing them, we must use |
924 | * jbd2 layer's try_to_free_buffers() function to release them. | 946 | * jbd2 layer's try_to_free_buffers() function to release them. |
925 | */ | 947 | */ |
926 | static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) | 948 | static int bdev_try_to_free_page(struct super_block *sb, struct page *page, |
949 | gfp_t wait) | ||
927 | { | 950 | { |
928 | journal_t *journal = EXT4_SB(sb)->s_journal; | 951 | journal_t *journal = EXT4_SB(sb)->s_journal; |
929 | 952 | ||
@@ -992,7 +1015,6 @@ static const struct super_operations ext4_sops = { | |||
992 | .dirty_inode = ext4_dirty_inode, | 1015 | .dirty_inode = ext4_dirty_inode, |
993 | .delete_inode = ext4_delete_inode, | 1016 | .delete_inode = ext4_delete_inode, |
994 | .put_super = ext4_put_super, | 1017 | .put_super = ext4_put_super, |
995 | .write_super = ext4_write_super, | ||
996 | .sync_fs = ext4_sync_fs, | 1018 | .sync_fs = ext4_sync_fs, |
997 | .freeze_fs = ext4_freeze, | 1019 | .freeze_fs = ext4_freeze, |
998 | .unfreeze_fs = ext4_unfreeze, | 1020 | .unfreeze_fs = ext4_unfreeze, |
@@ -1007,6 +1029,25 @@ static const struct super_operations ext4_sops = { | |||
1007 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1029 | .bdev_try_to_free_page = bdev_try_to_free_page, |
1008 | }; | 1030 | }; |
1009 | 1031 | ||
1032 | static const struct super_operations ext4_nojournal_sops = { | ||
1033 | .alloc_inode = ext4_alloc_inode, | ||
1034 | .destroy_inode = ext4_destroy_inode, | ||
1035 | .write_inode = ext4_write_inode, | ||
1036 | .dirty_inode = ext4_dirty_inode, | ||
1037 | .delete_inode = ext4_delete_inode, | ||
1038 | .write_super = ext4_write_super, | ||
1039 | .put_super = ext4_put_super, | ||
1040 | .statfs = ext4_statfs, | ||
1041 | .remount_fs = ext4_remount, | ||
1042 | .clear_inode = ext4_clear_inode, | ||
1043 | .show_options = ext4_show_options, | ||
1044 | #ifdef CONFIG_QUOTA | ||
1045 | .quota_read = ext4_quota_read, | ||
1046 | .quota_write = ext4_quota_write, | ||
1047 | #endif | ||
1048 | .bdev_try_to_free_page = bdev_try_to_free_page, | ||
1049 | }; | ||
1050 | |||
1010 | static const struct export_operations ext4_export_ops = { | 1051 | static const struct export_operations ext4_export_ops = { |
1011 | .fh_to_dentry = ext4_fh_to_dentry, | 1052 | .fh_to_dentry = ext4_fh_to_dentry, |
1012 | .fh_to_parent = ext4_fh_to_parent, | 1053 | .fh_to_parent = ext4_fh_to_parent, |
@@ -1023,12 +1064,13 @@ enum { | |||
1023 | Opt_journal_update, Opt_journal_dev, | 1064 | Opt_journal_update, Opt_journal_dev, |
1024 | Opt_journal_checksum, Opt_journal_async_commit, | 1065 | Opt_journal_checksum, Opt_journal_async_commit, |
1025 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1066 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1026 | Opt_data_err_abort, Opt_data_err_ignore, | 1067 | Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, |
1027 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1068 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1028 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 1069 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
1029 | Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, | 1070 | Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, |
1030 | Opt_usrquota, Opt_grpquota, Opt_i_version, | 1071 | Opt_usrquota, Opt_grpquota, Opt_i_version, |
1031 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 1072 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
1073 | Opt_block_validity, Opt_noblock_validity, | ||
1032 | Opt_inode_readahead_blks, Opt_journal_ioprio | 1074 | Opt_inode_readahead_blks, Opt_journal_ioprio |
1033 | }; | 1075 | }; |
1034 | 1076 | ||
@@ -1069,6 +1111,7 @@ static const match_table_t tokens = { | |||
1069 | {Opt_data_writeback, "data=writeback"}, | 1111 | {Opt_data_writeback, "data=writeback"}, |
1070 | {Opt_data_err_abort, "data_err=abort"}, | 1112 | {Opt_data_err_abort, "data_err=abort"}, |
1071 | {Opt_data_err_ignore, "data_err=ignore"}, | 1113 | {Opt_data_err_ignore, "data_err=ignore"}, |
1114 | {Opt_mb_history_length, "mb_history_length=%u"}, | ||
1072 | {Opt_offusrjquota, "usrjquota="}, | 1115 | {Opt_offusrjquota, "usrjquota="}, |
1073 | {Opt_usrjquota, "usrjquota=%s"}, | 1116 | {Opt_usrjquota, "usrjquota=%s"}, |
1074 | {Opt_offgrpjquota, "grpjquota="}, | 1117 | {Opt_offgrpjquota, "grpjquota="}, |
@@ -1087,6 +1130,8 @@ static const match_table_t tokens = { | |||
1087 | {Opt_resize, "resize"}, | 1130 | {Opt_resize, "resize"}, |
1088 | {Opt_delalloc, "delalloc"}, | 1131 | {Opt_delalloc, "delalloc"}, |
1089 | {Opt_nodelalloc, "nodelalloc"}, | 1132 | {Opt_nodelalloc, "nodelalloc"}, |
1133 | {Opt_block_validity, "block_validity"}, | ||
1134 | {Opt_noblock_validity, "noblock_validity"}, | ||
1090 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | 1135 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, |
1091 | {Opt_journal_ioprio, "journal_ioprio=%u"}, | 1136 | {Opt_journal_ioprio, "journal_ioprio=%u"}, |
1092 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, | 1137 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, |
@@ -1102,8 +1147,9 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
1102 | 1147 | ||
1103 | if (!options || strncmp(options, "sb=", 3) != 0) | 1148 | if (!options || strncmp(options, "sb=", 3) != 0) |
1104 | return 1; /* Default location */ | 1149 | return 1; /* Default location */ |
1150 | |||
1105 | options += 3; | 1151 | options += 3; |
1106 | /*todo: use simple_strtoll with >32bit ext4 */ | 1152 | /* TODO: use simple_strtoll with >32bit ext4 */ |
1107 | sb_block = simple_strtoul(options, &options, 0); | 1153 | sb_block = simple_strtoul(options, &options, 0); |
1108 | if (*options && *options != ',') { | 1154 | if (*options && *options != ',') { |
1109 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", | 1155 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
@@ -1113,6 +1159,7 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
1113 | if (*options == ',') | 1159 | if (*options == ',') |
1114 | options++; | 1160 | options++; |
1115 | *data = (void *) options; | 1161 | *data = (void *) options; |
1162 | |||
1116 | return sb_block; | 1163 | return sb_block; |
1117 | } | 1164 | } |
1118 | 1165 | ||
@@ -1206,8 +1253,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1206 | #else | 1253 | #else |
1207 | case Opt_user_xattr: | 1254 | case Opt_user_xattr: |
1208 | case Opt_nouser_xattr: | 1255 | case Opt_nouser_xattr: |
1209 | printk(KERN_ERR "EXT4 (no)user_xattr options " | 1256 | ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); |
1210 | "not supported\n"); | ||
1211 | break; | 1257 | break; |
1212 | #endif | 1258 | #endif |
1213 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1259 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
@@ -1220,8 +1266,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1220 | #else | 1266 | #else |
1221 | case Opt_acl: | 1267 | case Opt_acl: |
1222 | case Opt_noacl: | 1268 | case Opt_noacl: |
1223 | printk(KERN_ERR "EXT4 (no)acl options " | 1269 | ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); |
1224 | "not supported\n"); | ||
1225 | break; | 1270 | break; |
1226 | #endif | 1271 | #endif |
1227 | case Opt_journal_update: | 1272 | case Opt_journal_update: |
@@ -1231,16 +1276,16 @@ static int parse_options(char *options, struct super_block *sb, | |||
1231 | user to specify an existing inode to be the | 1276 | user to specify an existing inode to be the |
1232 | journal file. */ | 1277 | journal file. */ |
1233 | if (is_remount) { | 1278 | if (is_remount) { |
1234 | printk(KERN_ERR "EXT4-fs: cannot specify " | 1279 | ext4_msg(sb, KERN_ERR, |
1235 | "journal on remount\n"); | 1280 | "Cannot specify journal on remount"); |
1236 | return 0; | 1281 | return 0; |
1237 | } | 1282 | } |
1238 | set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); | 1283 | set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); |
1239 | break; | 1284 | break; |
1240 | case Opt_journal_dev: | 1285 | case Opt_journal_dev: |
1241 | if (is_remount) { | 1286 | if (is_remount) { |
1242 | printk(KERN_ERR "EXT4-fs: cannot specify " | 1287 | ext4_msg(sb, KERN_ERR, |
1243 | "journal on remount\n"); | 1288 | "Cannot specify journal on remount"); |
1244 | return 0; | 1289 | return 0; |
1245 | } | 1290 | } |
1246 | if (match_int(&args[0], &option)) | 1291 | if (match_int(&args[0], &option)) |
@@ -1294,9 +1339,8 @@ static int parse_options(char *options, struct super_block *sb, | |||
1294 | if (is_remount) { | 1339 | if (is_remount) { |
1295 | if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) | 1340 | if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) |
1296 | != data_opt) { | 1341 | != data_opt) { |
1297 | printk(KERN_ERR | 1342 | ext4_msg(sb, KERN_ERR, |
1298 | "EXT4-fs: cannot change data " | 1343 | "Cannot change data mode on remount"); |
1299 | "mode on remount\n"); | ||
1300 | return 0; | 1344 | return 0; |
1301 | } | 1345 | } |
1302 | } else { | 1346 | } else { |
@@ -1310,6 +1354,13 @@ static int parse_options(char *options, struct super_block *sb, | |||
1310 | case Opt_data_err_ignore: | 1354 | case Opt_data_err_ignore: |
1311 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | 1355 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); |
1312 | break; | 1356 | break; |
1357 | case Opt_mb_history_length: | ||
1358 | if (match_int(&args[0], &option)) | ||
1359 | return 0; | ||
1360 | if (option < 0) | ||
1361 | return 0; | ||
1362 | sbi->s_mb_history_max = option; | ||
1363 | break; | ||
1313 | #ifdef CONFIG_QUOTA | 1364 | #ifdef CONFIG_QUOTA |
1314 | case Opt_usrjquota: | 1365 | case Opt_usrjquota: |
1315 | qtype = USRQUOTA; | 1366 | qtype = USRQUOTA; |
@@ -1319,31 +1370,31 @@ static int parse_options(char *options, struct super_block *sb, | |||
1319 | set_qf_name: | 1370 | set_qf_name: |
1320 | if (sb_any_quota_loaded(sb) && | 1371 | if (sb_any_quota_loaded(sb) && |
1321 | !sbi->s_qf_names[qtype]) { | 1372 | !sbi->s_qf_names[qtype]) { |
1322 | printk(KERN_ERR | 1373 | ext4_msg(sb, KERN_ERR, |
1323 | "EXT4-fs: Cannot change journaled " | 1374 | "Cannot change journaled " |
1324 | "quota options when quota turned on.\n"); | 1375 | "quota options when quota turned on"); |
1325 | return 0; | 1376 | return 0; |
1326 | } | 1377 | } |
1327 | qname = match_strdup(&args[0]); | 1378 | qname = match_strdup(&args[0]); |
1328 | if (!qname) { | 1379 | if (!qname) { |
1329 | printk(KERN_ERR | 1380 | ext4_msg(sb, KERN_ERR, |
1330 | "EXT4-fs: not enough memory for " | 1381 | "Not enough memory for " |
1331 | "storing quotafile name.\n"); | 1382 | "storing quotafile name"); |
1332 | return 0; | 1383 | return 0; |
1333 | } | 1384 | } |
1334 | if (sbi->s_qf_names[qtype] && | 1385 | if (sbi->s_qf_names[qtype] && |
1335 | strcmp(sbi->s_qf_names[qtype], qname)) { | 1386 | strcmp(sbi->s_qf_names[qtype], qname)) { |
1336 | printk(KERN_ERR | 1387 | ext4_msg(sb, KERN_ERR, |
1337 | "EXT4-fs: %s quota file already " | 1388 | "%s quota file already " |
1338 | "specified.\n", QTYPE2NAME(qtype)); | 1389 | "specified", QTYPE2NAME(qtype)); |
1339 | kfree(qname); | 1390 | kfree(qname); |
1340 | return 0; | 1391 | return 0; |
1341 | } | 1392 | } |
1342 | sbi->s_qf_names[qtype] = qname; | 1393 | sbi->s_qf_names[qtype] = qname; |
1343 | if (strchr(sbi->s_qf_names[qtype], '/')) { | 1394 | if (strchr(sbi->s_qf_names[qtype], '/')) { |
1344 | printk(KERN_ERR | 1395 | ext4_msg(sb, KERN_ERR, |
1345 | "EXT4-fs: quotafile must be on " | 1396 | "quotafile must be on " |
1346 | "filesystem root.\n"); | 1397 | "filesystem root"); |
1347 | kfree(sbi->s_qf_names[qtype]); | 1398 | kfree(sbi->s_qf_names[qtype]); |
1348 | sbi->s_qf_names[qtype] = NULL; | 1399 | sbi->s_qf_names[qtype] = NULL; |
1349 | return 0; | 1400 | return 0; |
@@ -1358,9 +1409,9 @@ set_qf_name: | |||
1358 | clear_qf_name: | 1409 | clear_qf_name: |
1359 | if (sb_any_quota_loaded(sb) && | 1410 | if (sb_any_quota_loaded(sb) && |
1360 | sbi->s_qf_names[qtype]) { | 1411 | sbi->s_qf_names[qtype]) { |
1361 | printk(KERN_ERR "EXT4-fs: Cannot change " | 1412 | ext4_msg(sb, KERN_ERR, "Cannot change " |
1362 | "journaled quota options when " | 1413 | "journaled quota options when " |
1363 | "quota turned on.\n"); | 1414 | "quota turned on"); |
1364 | return 0; | 1415 | return 0; |
1365 | } | 1416 | } |
1366 | /* | 1417 | /* |
@@ -1377,9 +1428,9 @@ clear_qf_name: | |||
1377 | set_qf_format: | 1428 | set_qf_format: |
1378 | if (sb_any_quota_loaded(sb) && | 1429 | if (sb_any_quota_loaded(sb) && |
1379 | sbi->s_jquota_fmt != qfmt) { | 1430 | sbi->s_jquota_fmt != qfmt) { |
1380 | printk(KERN_ERR "EXT4-fs: Cannot change " | 1431 | ext4_msg(sb, KERN_ERR, "Cannot change " |
1381 | "journaled quota options when " | 1432 | "journaled quota options when " |
1382 | "quota turned on.\n"); | 1433 | "quota turned on"); |
1383 | return 0; | 1434 | return 0; |
1384 | } | 1435 | } |
1385 | sbi->s_jquota_fmt = qfmt; | 1436 | sbi->s_jquota_fmt = qfmt; |
@@ -1395,8 +1446,8 @@ set_qf_format: | |||
1395 | break; | 1446 | break; |
1396 | case Opt_noquota: | 1447 | case Opt_noquota: |
1397 | if (sb_any_quota_loaded(sb)) { | 1448 | if (sb_any_quota_loaded(sb)) { |
1398 | printk(KERN_ERR "EXT4-fs: Cannot change quota " | 1449 | ext4_msg(sb, KERN_ERR, "Cannot change quota " |
1399 | "options when quota turned on.\n"); | 1450 | "options when quota turned on"); |
1400 | return 0; | 1451 | return 0; |
1401 | } | 1452 | } |
1402 | clear_opt(sbi->s_mount_opt, QUOTA); | 1453 | clear_opt(sbi->s_mount_opt, QUOTA); |
@@ -1407,8 +1458,8 @@ set_qf_format: | |||
1407 | case Opt_quota: | 1458 | case Opt_quota: |
1408 | case Opt_usrquota: | 1459 | case Opt_usrquota: |
1409 | case Opt_grpquota: | 1460 | case Opt_grpquota: |
1410 | printk(KERN_ERR | 1461 | ext4_msg(sb, KERN_ERR, |
1411 | "EXT4-fs: quota options not supported.\n"); | 1462 | "quota options not supported"); |
1412 | break; | 1463 | break; |
1413 | case Opt_usrjquota: | 1464 | case Opt_usrjquota: |
1414 | case Opt_grpjquota: | 1465 | case Opt_grpjquota: |
@@ -1416,9 +1467,8 @@ set_qf_format: | |||
1416 | case Opt_offgrpjquota: | 1467 | case Opt_offgrpjquota: |
1417 | case Opt_jqfmt_vfsold: | 1468 | case Opt_jqfmt_vfsold: |
1418 | case Opt_jqfmt_vfsv0: | 1469 | case Opt_jqfmt_vfsv0: |
1419 | printk(KERN_ERR | 1470 | ext4_msg(sb, KERN_ERR, |
1420 | "EXT4-fs: journaled quota options not " | 1471 | "journaled quota options not supported"); |
1421 | "supported.\n"); | ||
1422 | break; | 1472 | break; |
1423 | case Opt_noquota: | 1473 | case Opt_noquota: |
1424 | break; | 1474 | break; |
@@ -1443,8 +1493,9 @@ set_qf_format: | |||
1443 | break; | 1493 | break; |
1444 | case Opt_resize: | 1494 | case Opt_resize: |
1445 | if (!is_remount) { | 1495 | if (!is_remount) { |
1446 | printk("EXT4-fs: resize option only available " | 1496 | ext4_msg(sb, KERN_ERR, |
1447 | "for remount\n"); | 1497 | "resize option only available " |
1498 | "for remount"); | ||
1448 | return 0; | 1499 | return 0; |
1449 | } | 1500 | } |
1450 | if (match_int(&args[0], &option) != 0) | 1501 | if (match_int(&args[0], &option) != 0) |
@@ -1474,14 +1525,21 @@ set_qf_format: | |||
1474 | case Opt_delalloc: | 1525 | case Opt_delalloc: |
1475 | set_opt(sbi->s_mount_opt, DELALLOC); | 1526 | set_opt(sbi->s_mount_opt, DELALLOC); |
1476 | break; | 1527 | break; |
1528 | case Opt_block_validity: | ||
1529 | set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | ||
1530 | break; | ||
1531 | case Opt_noblock_validity: | ||
1532 | clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | ||
1533 | break; | ||
1477 | case Opt_inode_readahead_blks: | 1534 | case Opt_inode_readahead_blks: |
1478 | if (match_int(&args[0], &option)) | 1535 | if (match_int(&args[0], &option)) |
1479 | return 0; | 1536 | return 0; |
1480 | if (option < 0 || option > (1 << 30)) | 1537 | if (option < 0 || option > (1 << 30)) |
1481 | return 0; | 1538 | return 0; |
1482 | if (option & (option - 1)) { | 1539 | if (!is_power_of_2(option)) { |
1483 | printk(KERN_ERR "EXT4-fs: inode_readahead_blks" | 1540 | ext4_msg(sb, KERN_ERR, |
1484 | " must be a power of 2\n"); | 1541 | "EXT4-fs: inode_readahead_blks" |
1542 | " must be a power of 2"); | ||
1485 | return 0; | 1543 | return 0; |
1486 | } | 1544 | } |
1487 | sbi->s_inode_readahead_blks = option; | 1545 | sbi->s_inode_readahead_blks = option; |
@@ -1508,9 +1566,9 @@ set_qf_format: | |||
1508 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); | 1566 | set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); |
1509 | break; | 1567 | break; |
1510 | default: | 1568 | default: |
1511 | printk(KERN_ERR | 1569 | ext4_msg(sb, KERN_ERR, |
1512 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1570 | "Unrecognized mount option \"%s\" " |
1513 | "or missing value\n", p); | 1571 | "or missing value", p); |
1514 | return 0; | 1572 | return 0; |
1515 | } | 1573 | } |
1516 | } | 1574 | } |
@@ -1528,21 +1586,21 @@ set_qf_format: | |||
1528 | (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || | 1586 | (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || |
1529 | (sbi->s_qf_names[GRPQUOTA] && | 1587 | (sbi->s_qf_names[GRPQUOTA] && |
1530 | (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { | 1588 | (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { |
1531 | printk(KERN_ERR "EXT4-fs: old and new quota " | 1589 | ext4_msg(sb, KERN_ERR, "old and new quota " |
1532 | "format mixing.\n"); | 1590 | "format mixing"); |
1533 | return 0; | 1591 | return 0; |
1534 | } | 1592 | } |
1535 | 1593 | ||
1536 | if (!sbi->s_jquota_fmt) { | 1594 | if (!sbi->s_jquota_fmt) { |
1537 | printk(KERN_ERR "EXT4-fs: journaled quota format " | 1595 | ext4_msg(sb, KERN_ERR, "journaled quota format " |
1538 | "not specified.\n"); | 1596 | "not specified"); |
1539 | return 0; | 1597 | return 0; |
1540 | } | 1598 | } |
1541 | } else { | 1599 | } else { |
1542 | if (sbi->s_jquota_fmt) { | 1600 | if (sbi->s_jquota_fmt) { |
1543 | printk(KERN_ERR "EXT4-fs: journaled quota format " | 1601 | ext4_msg(sb, KERN_ERR, "journaled quota format " |
1544 | "specified with no journaling " | 1602 | "specified with no journaling " |
1545 | "enabled.\n"); | 1603 | "enabled"); |
1546 | return 0; | 1604 | return 0; |
1547 | } | 1605 | } |
1548 | } | 1606 | } |
@@ -1557,32 +1615,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1557 | int res = 0; | 1615 | int res = 0; |
1558 | 1616 | ||
1559 | if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { | 1617 | if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { |
1560 | printk(KERN_ERR "EXT4-fs warning: revision level too high, " | 1618 | ext4_msg(sb, KERN_ERR, "revision level too high, " |
1561 | "forcing read-only mode\n"); | 1619 | "forcing read-only mode"); |
1562 | res = MS_RDONLY; | 1620 | res = MS_RDONLY; |
1563 | } | 1621 | } |
1564 | if (read_only) | 1622 | if (read_only) |
1565 | return res; | 1623 | return res; |
1566 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) | 1624 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) |
1567 | printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " | 1625 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " |
1568 | "running e2fsck is recommended\n"); | 1626 | "running e2fsck is recommended"); |
1569 | else if ((sbi->s_mount_state & EXT4_ERROR_FS)) | 1627 | else if ((sbi->s_mount_state & EXT4_ERROR_FS)) |
1570 | printk(KERN_WARNING | 1628 | ext4_msg(sb, KERN_WARNING, |
1571 | "EXT4-fs warning: mounting fs with errors, " | 1629 | "warning: mounting fs with errors, " |
1572 | "running e2fsck is recommended\n"); | 1630 | "running e2fsck is recommended"); |
1573 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && | 1631 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && |
1574 | le16_to_cpu(es->s_mnt_count) >= | 1632 | le16_to_cpu(es->s_mnt_count) >= |
1575 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) | 1633 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) |
1576 | printk(KERN_WARNING | 1634 | ext4_msg(sb, KERN_WARNING, |
1577 | "EXT4-fs warning: maximal mount count reached, " | 1635 | "warning: maximal mount count reached, " |
1578 | "running e2fsck is recommended\n"); | 1636 | "running e2fsck is recommended"); |
1579 | else if (le32_to_cpu(es->s_checkinterval) && | 1637 | else if (le32_to_cpu(es->s_checkinterval) && |
1580 | (le32_to_cpu(es->s_lastcheck) + | 1638 | (le32_to_cpu(es->s_lastcheck) + |
1581 | le32_to_cpu(es->s_checkinterval) <= get_seconds())) | 1639 | le32_to_cpu(es->s_checkinterval) <= get_seconds())) |
1582 | printk(KERN_WARNING | 1640 | ext4_msg(sb, KERN_WARNING, |
1583 | "EXT4-fs warning: checktime reached, " | 1641 | "warning: checktime reached, " |
1584 | "running e2fsck is recommended\n"); | 1642 | "running e2fsck is recommended"); |
1585 | if (!sbi->s_journal) | 1643 | if (!sbi->s_journal) |
1586 | es->s_state &= cpu_to_le16(~EXT4_VALID_FS); | 1644 | es->s_state &= cpu_to_le16(~EXT4_VALID_FS); |
1587 | if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) | 1645 | if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) |
1588 | es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); | 1646 | es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); |
@@ -1592,7 +1650,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1592 | if (sbi->s_journal) | 1650 | if (sbi->s_journal) |
1593 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 1651 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
1594 | 1652 | ||
1595 | ext4_commit_super(sb, es, 1); | 1653 | ext4_commit_super(sb, 1); |
1596 | if (test_opt(sb, DEBUG)) | 1654 | if (test_opt(sb, DEBUG)) |
1597 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " | 1655 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " |
1598 | "bpg=%lu, ipg=%lu, mo=%04lx]\n", | 1656 | "bpg=%lu, ipg=%lu, mo=%04lx]\n", |
@@ -1603,11 +1661,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1603 | sbi->s_mount_opt); | 1661 | sbi->s_mount_opt); |
1604 | 1662 | ||
1605 | if (EXT4_SB(sb)->s_journal) { | 1663 | if (EXT4_SB(sb)->s_journal) { |
1606 | printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", | 1664 | ext4_msg(sb, KERN_INFO, "%s journal on %s", |
1607 | sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : | 1665 | EXT4_SB(sb)->s_journal->j_inode ? "internal" : |
1608 | "external", EXT4_SB(sb)->s_journal->j_devname); | 1666 | "external", EXT4_SB(sb)->s_journal->j_devname); |
1609 | } else { | 1667 | } else { |
1610 | printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); | 1668 | ext4_msg(sb, KERN_INFO, "no journal"); |
1611 | } | 1669 | } |
1612 | return res; | 1670 | return res; |
1613 | } | 1671 | } |
@@ -1616,10 +1674,10 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1616 | { | 1674 | { |
1617 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1675 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1618 | struct ext4_group_desc *gdp = NULL; | 1676 | struct ext4_group_desc *gdp = NULL; |
1619 | struct buffer_head *bh; | ||
1620 | ext4_group_t flex_group_count; | 1677 | ext4_group_t flex_group_count; |
1621 | ext4_group_t flex_group; | 1678 | ext4_group_t flex_group; |
1622 | int groups_per_flex = 0; | 1679 | int groups_per_flex = 0; |
1680 | size_t size; | ||
1623 | int i; | 1681 | int i; |
1624 | 1682 | ||
1625 | if (!sbi->s_es->s_log_groups_per_flex) { | 1683 | if (!sbi->s_es->s_log_groups_per_flex) { |
@@ -1634,16 +1692,21 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1634 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + | 1692 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
1635 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << | 1693 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << |
1636 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; | 1694 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; |
1637 | sbi->s_flex_groups = kzalloc(flex_group_count * | 1695 | size = flex_group_count * sizeof(struct flex_groups); |
1638 | sizeof(struct flex_groups), GFP_KERNEL); | 1696 | sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); |
1697 | if (sbi->s_flex_groups == NULL) { | ||
1698 | sbi->s_flex_groups = vmalloc(size); | ||
1699 | if (sbi->s_flex_groups) | ||
1700 | memset(sbi->s_flex_groups, 0, size); | ||
1701 | } | ||
1639 | if (sbi->s_flex_groups == NULL) { | 1702 | if (sbi->s_flex_groups == NULL) { |
1640 | printk(KERN_ERR "EXT4-fs: not enough memory for " | 1703 | ext4_msg(sb, KERN_ERR, "not enough memory for " |
1641 | "%u flex groups\n", flex_group_count); | 1704 | "%u flex groups", flex_group_count); |
1642 | goto failed; | 1705 | goto failed; |
1643 | } | 1706 | } |
1644 | 1707 | ||
1645 | for (i = 0; i < sbi->s_groups_count; i++) { | 1708 | for (i = 0; i < sbi->s_groups_count; i++) { |
1646 | gdp = ext4_get_group_desc(sb, i, &bh); | 1709 | gdp = ext4_get_group_desc(sb, i, NULL); |
1647 | 1710 | ||
1648 | flex_group = ext4_flex_group(sbi, i); | 1711 | flex_group = ext4_flex_group(sbi, i); |
1649 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, | 1712 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, |
@@ -1724,44 +1787,44 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1724 | 1787 | ||
1725 | block_bitmap = ext4_block_bitmap(sb, gdp); | 1788 | block_bitmap = ext4_block_bitmap(sb, gdp); |
1726 | if (block_bitmap < first_block || block_bitmap > last_block) { | 1789 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1727 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1790 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1728 | "Block bitmap for group %u not in group " | 1791 | "Block bitmap for group %u not in group " |
1729 | "(block %llu)!\n", i, block_bitmap); | 1792 | "(block %llu)!", i, block_bitmap); |
1730 | return 0; | 1793 | return 0; |
1731 | } | 1794 | } |
1732 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 1795 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
1733 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 1796 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
1734 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1797 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1735 | "Inode bitmap for group %u not in group " | 1798 | "Inode bitmap for group %u not in group " |
1736 | "(block %llu)!\n", i, inode_bitmap); | 1799 | "(block %llu)!", i, inode_bitmap); |
1737 | return 0; | 1800 | return 0; |
1738 | } | 1801 | } |
1739 | inode_table = ext4_inode_table(sb, gdp); | 1802 | inode_table = ext4_inode_table(sb, gdp); |
1740 | if (inode_table < first_block || | 1803 | if (inode_table < first_block || |
1741 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 1804 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
1742 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1805 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1743 | "Inode table for group %u not in group " | 1806 | "Inode table for group %u not in group " |
1744 | "(block %llu)!\n", i, inode_table); | 1807 | "(block %llu)!", i, inode_table); |
1745 | return 0; | 1808 | return 0; |
1746 | } | 1809 | } |
1747 | spin_lock(sb_bgl_lock(sbi, i)); | 1810 | ext4_lock_group(sb, i); |
1748 | if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { | 1811 | if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { |
1749 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1812 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
1750 | "Checksum for group %u failed (%u!=%u)\n", | 1813 | "Checksum for group %u failed (%u!=%u)", |
1751 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1814 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
1752 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 1815 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
1753 | if (!(sb->s_flags & MS_RDONLY)) { | 1816 | if (!(sb->s_flags & MS_RDONLY)) { |
1754 | spin_unlock(sb_bgl_lock(sbi, i)); | 1817 | ext4_unlock_group(sb, i); |
1755 | return 0; | 1818 | return 0; |
1756 | } | 1819 | } |
1757 | } | 1820 | } |
1758 | spin_unlock(sb_bgl_lock(sbi, i)); | 1821 | ext4_unlock_group(sb, i); |
1759 | if (!flexbg_flag) | 1822 | if (!flexbg_flag) |
1760 | first_block += EXT4_BLOCKS_PER_GROUP(sb); | 1823 | first_block += EXT4_BLOCKS_PER_GROUP(sb); |
1761 | } | 1824 | } |
1762 | 1825 | ||
1763 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 1826 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); |
1764 | sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 1827 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
1765 | return 1; | 1828 | return 1; |
1766 | } | 1829 | } |
1767 | 1830 | ||
@@ -1796,8 +1859,8 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1796 | } | 1859 | } |
1797 | 1860 | ||
1798 | if (bdev_read_only(sb->s_bdev)) { | 1861 | if (bdev_read_only(sb->s_bdev)) { |
1799 | printk(KERN_ERR "EXT4-fs: write access " | 1862 | ext4_msg(sb, KERN_ERR, "write access " |
1800 | "unavailable, skipping orphan cleanup.\n"); | 1863 | "unavailable, skipping orphan cleanup"); |
1801 | return; | 1864 | return; |
1802 | } | 1865 | } |
1803 | 1866 | ||
@@ -1811,8 +1874,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1811 | } | 1874 | } |
1812 | 1875 | ||
1813 | if (s_flags & MS_RDONLY) { | 1876 | if (s_flags & MS_RDONLY) { |
1814 | printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", | 1877 | ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); |
1815 | sb->s_id); | ||
1816 | sb->s_flags &= ~MS_RDONLY; | 1878 | sb->s_flags &= ~MS_RDONLY; |
1817 | } | 1879 | } |
1818 | #ifdef CONFIG_QUOTA | 1880 | #ifdef CONFIG_QUOTA |
@@ -1823,9 +1885,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1823 | if (EXT4_SB(sb)->s_qf_names[i]) { | 1885 | if (EXT4_SB(sb)->s_qf_names[i]) { |
1824 | int ret = ext4_quota_on_mount(sb, i); | 1886 | int ret = ext4_quota_on_mount(sb, i); |
1825 | if (ret < 0) | 1887 | if (ret < 0) |
1826 | printk(KERN_ERR | 1888 | ext4_msg(sb, KERN_ERR, |
1827 | "EXT4-fs: Cannot turn on journaled " | 1889 | "Cannot turn on journaled " |
1828 | "quota: error %d\n", ret); | 1890 | "quota: error %d", ret); |
1829 | } | 1891 | } |
1830 | } | 1892 | } |
1831 | #endif | 1893 | #endif |
@@ -1842,16 +1904,16 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1842 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 1904 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
1843 | vfs_dq_init(inode); | 1905 | vfs_dq_init(inode); |
1844 | if (inode->i_nlink) { | 1906 | if (inode->i_nlink) { |
1845 | printk(KERN_DEBUG | 1907 | ext4_msg(sb, KERN_DEBUG, |
1846 | "%s: truncating inode %lu to %lld bytes\n", | 1908 | "%s: truncating inode %lu to %lld bytes", |
1847 | __func__, inode->i_ino, inode->i_size); | 1909 | __func__, inode->i_ino, inode->i_size); |
1848 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 1910 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
1849 | inode->i_ino, inode->i_size); | 1911 | inode->i_ino, inode->i_size); |
1850 | ext4_truncate(inode); | 1912 | ext4_truncate(inode); |
1851 | nr_truncates++; | 1913 | nr_truncates++; |
1852 | } else { | 1914 | } else { |
1853 | printk(KERN_DEBUG | 1915 | ext4_msg(sb, KERN_DEBUG, |
1854 | "%s: deleting unreferenced inode %lu\n", | 1916 | "%s: deleting unreferenced inode %lu", |
1855 | __func__, inode->i_ino); | 1917 | __func__, inode->i_ino); |
1856 | jbd_debug(2, "deleting unreferenced inode %lu\n", | 1918 | jbd_debug(2, "deleting unreferenced inode %lu\n", |
1857 | inode->i_ino); | 1919 | inode->i_ino); |
@@ -1863,11 +1925,11 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1863 | #define PLURAL(x) (x), ((x) == 1) ? "" : "s" | 1925 | #define PLURAL(x) (x), ((x) == 1) ? "" : "s" |
1864 | 1926 | ||
1865 | if (nr_orphans) | 1927 | if (nr_orphans) |
1866 | printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", | 1928 | ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", |
1867 | sb->s_id, PLURAL(nr_orphans)); | 1929 | PLURAL(nr_orphans)); |
1868 | if (nr_truncates) | 1930 | if (nr_truncates) |
1869 | printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", | 1931 | ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", |
1870 | sb->s_id, PLURAL(nr_truncates)); | 1932 | PLURAL(nr_truncates)); |
1871 | #ifdef CONFIG_QUOTA | 1933 | #ifdef CONFIG_QUOTA |
1872 | /* Turn quotas off */ | 1934 | /* Turn quotas off */ |
1873 | for (i = 0; i < MAXQUOTAS; i++) { | 1935 | for (i = 0; i < MAXQUOTAS; i++) { |
@@ -1877,6 +1939,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1877 | #endif | 1939 | #endif |
1878 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ | 1940 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ |
1879 | } | 1941 | } |
1942 | |||
1880 | /* | 1943 | /* |
1881 | * Maximal extent format file size. | 1944 | * Maximal extent format file size. |
1882 | * Resulting logical blkno at s_maxbytes must fit in our on-disk | 1945 | * Resulting logical blkno at s_maxbytes must fit in our on-disk |
@@ -1927,19 +1990,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) | |||
1927 | loff_t res = EXT4_NDIR_BLOCKS; | 1990 | loff_t res = EXT4_NDIR_BLOCKS; |
1928 | int meta_blocks; | 1991 | int meta_blocks; |
1929 | loff_t upper_limit; | 1992 | loff_t upper_limit; |
1930 | /* This is calculated to be the largest file size for a | 1993 | /* This is calculated to be the largest file size for a dense, block |
1931 | * dense, bitmapped file such that the total number of | 1994 | * mapped file such that the file's total number of 512-byte sectors, |
1932 | * sectors in the file, including data and all indirect blocks, | 1995 | * including data and all indirect blocks, does not exceed (2^48 - 1). |
1933 | * does not exceed 2^48 -1 | 1996 | * |
1934 | * __u32 i_blocks_lo and _u16 i_blocks_high representing the | 1997 | * __u32 i_blocks_lo and _u16 i_blocks_high represent the total |
1935 | * total number of 512 bytes blocks of the file | 1998 | * number of 512-byte sectors of the file. |
1936 | */ | 1999 | */ |
1937 | 2000 | ||
1938 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { | 2001 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1939 | /* | 2002 | /* |
1940 | * !has_huge_files or CONFIG_LBD is not enabled | 2003 | * !has_huge_files or CONFIG_LBD not enabled implies that |
1941 | * implies the inode i_block represent total blocks in | 2004 | * the inode i_block field represents total file blocks in |
1942 | * 512 bytes 32 == size of vfs inode i_blocks * 8 | 2005 | * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 |
1943 | */ | 2006 | */ |
1944 | upper_limit = (1LL << 32) - 1; | 2007 | upper_limit = (1LL << 32) - 1; |
1945 | 2008 | ||
@@ -1981,7 +2044,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) | |||
1981 | } | 2044 | } |
1982 | 2045 | ||
1983 | static ext4_fsblk_t descriptor_loc(struct super_block *sb, | 2046 | static ext4_fsblk_t descriptor_loc(struct super_block *sb, |
1984 | ext4_fsblk_t logical_sb_block, int nr) | 2047 | ext4_fsblk_t logical_sb_block, int nr) |
1985 | { | 2048 | { |
1986 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2049 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1987 | ext4_group_t bg, first_meta_bg; | 2050 | ext4_group_t bg, first_meta_bg; |
@@ -1995,6 +2058,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, | |||
1995 | bg = sbi->s_desc_per_block * nr; | 2058 | bg = sbi->s_desc_per_block * nr; |
1996 | if (ext4_bg_has_super(sb, bg)) | 2059 | if (ext4_bg_has_super(sb, bg)) |
1997 | has_super = 1; | 2060 | has_super = 1; |
2061 | |||
1998 | return (has_super + ext4_group_first_block_no(sb, bg)); | 2062 | return (has_super + ext4_group_first_block_no(sb, bg)); |
1999 | } | 2063 | } |
2000 | 2064 | ||
@@ -2091,8 +2155,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2091 | if (parse_strtoul(buf, 0x40000000, &t)) | 2155 | if (parse_strtoul(buf, 0x40000000, &t)) |
2092 | return -EINVAL; | 2156 | return -EINVAL; |
2093 | 2157 | ||
2094 | /* inode_readahead_blks must be a power of 2 */ | 2158 | if (!is_power_of_2(t)) |
2095 | if (t & (t-1)) | ||
2096 | return -EINVAL; | 2159 | return -EINVAL; |
2097 | 2160 | ||
2098 | sbi->s_inode_readahead_blks = t; | 2161 | sbi->s_inode_readahead_blks = t; |
@@ -2100,7 +2163,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2100 | } | 2163 | } |
2101 | 2164 | ||
2102 | static ssize_t sbi_ui_show(struct ext4_attr *a, | 2165 | static ssize_t sbi_ui_show(struct ext4_attr *a, |
2103 | struct ext4_sb_info *sbi, char *buf) | 2166 | struct ext4_sb_info *sbi, char *buf) |
2104 | { | 2167 | { |
2105 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2168 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); |
2106 | 2169 | ||
@@ -2205,7 +2268,6 @@ static struct kobj_type ext4_ktype = { | |||
2205 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2268 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2206 | __releases(kernel_lock) | 2269 | __releases(kernel_lock) |
2207 | __acquires(kernel_lock) | 2270 | __acquires(kernel_lock) |
2208 | |||
2209 | { | 2271 | { |
2210 | struct buffer_head *bh; | 2272 | struct buffer_head *bh; |
2211 | struct ext4_super_block *es = NULL; | 2273 | struct ext4_super_block *es = NULL; |
@@ -2256,7 +2318,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2256 | 2318 | ||
2257 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 2319 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
2258 | if (!blocksize) { | 2320 | if (!blocksize) { |
2259 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); | 2321 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); |
2260 | goto out_fail; | 2322 | goto out_fail; |
2261 | } | 2323 | } |
2262 | 2324 | ||
@@ -2272,7 +2334,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2272 | } | 2334 | } |
2273 | 2335 | ||
2274 | if (!(bh = sb_bread(sb, logical_sb_block))) { | 2336 | if (!(bh = sb_bread(sb, logical_sb_block))) { |
2275 | printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); | 2337 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); |
2276 | goto out_fail; | 2338 | goto out_fail; |
2277 | } | 2339 | } |
2278 | /* | 2340 | /* |
@@ -2321,6 +2383,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2321 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; | 2383 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; |
2322 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; | 2384 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; |
2323 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | 2385 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; |
2386 | sbi->s_mb_history_max = default_mb_history_length; | ||
2324 | 2387 | ||
2325 | set_opt(sbi->s_mount_opt, BARRIER); | 2388 | set_opt(sbi->s_mount_opt, BARRIER); |
2326 | 2389 | ||
@@ -2330,7 +2393,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2330 | */ | 2393 | */ |
2331 | set_opt(sbi->s_mount_opt, DELALLOC); | 2394 | set_opt(sbi->s_mount_opt, DELALLOC); |
2332 | 2395 | ||
2333 | |||
2334 | if (!parse_options((char *) data, sb, &journal_devnum, | 2396 | if (!parse_options((char *) data, sb, &journal_devnum, |
2335 | &journal_ioprio, NULL, 0)) | 2397 | &journal_ioprio, NULL, 0)) |
2336 | goto failed_mount; | 2398 | goto failed_mount; |
@@ -2342,9 +2404,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2342 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || | 2404 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || |
2343 | EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || | 2405 | EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || |
2344 | EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) | 2406 | EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) |
2345 | printk(KERN_WARNING | 2407 | ext4_msg(sb, KERN_WARNING, |
2346 | "EXT4-fs warning: feature flags set on rev 0 fs, " | 2408 | "feature flags set on rev 0 fs, " |
2347 | "running e2fsck is recommended\n"); | 2409 | "running e2fsck is recommended"); |
2348 | 2410 | ||
2349 | /* | 2411 | /* |
2350 | * Check feature flags regardless of the revision level, since we | 2412 | * Check feature flags regardless of the revision level, since we |
@@ -2353,16 +2415,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2353 | */ | 2415 | */ |
2354 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); | 2416 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); |
2355 | if (features) { | 2417 | if (features) { |
2356 | printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " | 2418 | ext4_msg(sb, KERN_ERR, |
2357 | "unsupported optional features (%x).\n", sb->s_id, | 2419 | "Couldn't mount because of " |
2420 | "unsupported optional features (%x)", | ||
2358 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | 2421 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & |
2359 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | 2422 | ~EXT4_FEATURE_INCOMPAT_SUPP)); |
2360 | goto failed_mount; | 2423 | goto failed_mount; |
2361 | } | 2424 | } |
2362 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); | 2425 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); |
2363 | if (!(sb->s_flags & MS_RDONLY) && features) { | 2426 | if (!(sb->s_flags & MS_RDONLY) && features) { |
2364 | printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " | 2427 | ext4_msg(sb, KERN_ERR, |
2365 | "unsupported optional features (%x).\n", sb->s_id, | 2428 | "Couldn't mount RDWR because of " |
2429 | "unsupported optional features (%x)", | ||
2366 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | 2430 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & |
2367 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | 2431 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); |
2368 | goto failed_mount; | 2432 | goto failed_mount; |
@@ -2376,9 +2440,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2376 | */ | 2440 | */ |
2377 | if (sizeof(root->i_blocks) < sizeof(u64) && | 2441 | if (sizeof(root->i_blocks) < sizeof(u64) && |
2378 | !(sb->s_flags & MS_RDONLY)) { | 2442 | !(sb->s_flags & MS_RDONLY)) { |
2379 | printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " | 2443 | ext4_msg(sb, KERN_ERR, "Filesystem with huge " |
2380 | "files cannot be mounted read-write " | 2444 | "files cannot be mounted read-write " |
2381 | "without CONFIG_LBD.\n", sb->s_id); | 2445 | "without CONFIG_LBD"); |
2382 | goto failed_mount; | 2446 | goto failed_mount; |
2383 | } | 2447 | } |
2384 | } | 2448 | } |
@@ -2386,17 +2450,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2386 | 2450 | ||
2387 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 2451 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
2388 | blocksize > EXT4_MAX_BLOCK_SIZE) { | 2452 | blocksize > EXT4_MAX_BLOCK_SIZE) { |
2389 | printk(KERN_ERR | 2453 | ext4_msg(sb, KERN_ERR, |
2390 | "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", | 2454 | "Unsupported filesystem blocksize %d", blocksize); |
2391 | blocksize, sb->s_id); | ||
2392 | goto failed_mount; | 2455 | goto failed_mount; |
2393 | } | 2456 | } |
2394 | 2457 | ||
2395 | if (sb->s_blocksize != blocksize) { | 2458 | if (sb->s_blocksize != blocksize) { |
2396 | |||
2397 | /* Validate the filesystem blocksize */ | 2459 | /* Validate the filesystem blocksize */ |
2398 | if (!sb_set_blocksize(sb, blocksize)) { | 2460 | if (!sb_set_blocksize(sb, blocksize)) { |
2399 | printk(KERN_ERR "EXT4-fs: bad block size %d.\n", | 2461 | ext4_msg(sb, KERN_ERR, "bad block size %d", |
2400 | blocksize); | 2462 | blocksize); |
2401 | goto failed_mount; | 2463 | goto failed_mount; |
2402 | } | 2464 | } |
@@ -2406,15 +2468,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2406 | offset = do_div(logical_sb_block, blocksize); | 2468 | offset = do_div(logical_sb_block, blocksize); |
2407 | bh = sb_bread(sb, logical_sb_block); | 2469 | bh = sb_bread(sb, logical_sb_block); |
2408 | if (!bh) { | 2470 | if (!bh) { |
2409 | printk(KERN_ERR | 2471 | ext4_msg(sb, KERN_ERR, |
2410 | "EXT4-fs: Can't read superblock on 2nd try.\n"); | 2472 | "Can't read superblock on 2nd try"); |
2411 | goto failed_mount; | 2473 | goto failed_mount; |
2412 | } | 2474 | } |
2413 | es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); | 2475 | es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); |
2414 | sbi->s_es = es; | 2476 | sbi->s_es = es; |
2415 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { | 2477 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { |
2416 | printk(KERN_ERR | 2478 | ext4_msg(sb, KERN_ERR, |
2417 | "EXT4-fs: Magic mismatch, very weird !\n"); | 2479 | "Magic mismatch, very weird!"); |
2418 | goto failed_mount; | 2480 | goto failed_mount; |
2419 | } | 2481 | } |
2420 | } | 2482 | } |
@@ -2432,30 +2494,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2432 | if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || | 2494 | if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || |
2433 | (!is_power_of_2(sbi->s_inode_size)) || | 2495 | (!is_power_of_2(sbi->s_inode_size)) || |
2434 | (sbi->s_inode_size > blocksize)) { | 2496 | (sbi->s_inode_size > blocksize)) { |
2435 | printk(KERN_ERR | 2497 | ext4_msg(sb, KERN_ERR, |
2436 | "EXT4-fs: unsupported inode size: %d\n", | 2498 | "unsupported inode size: %d", |
2437 | sbi->s_inode_size); | 2499 | sbi->s_inode_size); |
2438 | goto failed_mount; | 2500 | goto failed_mount; |
2439 | } | 2501 | } |
2440 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) | 2502 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) |
2441 | sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); | 2503 | sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); |
2442 | } | 2504 | } |
2505 | |||
2443 | sbi->s_desc_size = le16_to_cpu(es->s_desc_size); | 2506 | sbi->s_desc_size = le16_to_cpu(es->s_desc_size); |
2444 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { | 2507 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { |
2445 | if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || | 2508 | if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || |
2446 | sbi->s_desc_size > EXT4_MAX_DESC_SIZE || | 2509 | sbi->s_desc_size > EXT4_MAX_DESC_SIZE || |
2447 | !is_power_of_2(sbi->s_desc_size)) { | 2510 | !is_power_of_2(sbi->s_desc_size)) { |
2448 | printk(KERN_ERR | 2511 | ext4_msg(sb, KERN_ERR, |
2449 | "EXT4-fs: unsupported descriptor size %lu\n", | 2512 | "unsupported descriptor size %lu", |
2450 | sbi->s_desc_size); | 2513 | sbi->s_desc_size); |
2451 | goto failed_mount; | 2514 | goto failed_mount; |
2452 | } | 2515 | } |
2453 | } else | 2516 | } else |
2454 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; | 2517 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; |
2518 | |||
2455 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); | 2519 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); |
2456 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); | 2520 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); |
2457 | if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) | 2521 | if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) |
2458 | goto cantfind_ext4; | 2522 | goto cantfind_ext4; |
2523 | |||
2459 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); | 2524 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); |
2460 | if (sbi->s_inodes_per_block == 0) | 2525 | if (sbi->s_inodes_per_block == 0) |
2461 | goto cantfind_ext4; | 2526 | goto cantfind_ext4; |
@@ -2466,6 +2531,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2466 | sbi->s_mount_state = le16_to_cpu(es->s_state); | 2531 | sbi->s_mount_state = le16_to_cpu(es->s_state); |
2467 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); | 2532 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); |
2468 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); | 2533 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); |
2534 | |||
2469 | for (i = 0; i < 4; i++) | 2535 | for (i = 0; i < 4; i++) |
2470 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); | 2536 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); |
2471 | sbi->s_def_hash_version = es->s_def_hash_version; | 2537 | sbi->s_def_hash_version = es->s_def_hash_version; |
@@ -2483,25 +2549,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2483 | } | 2549 | } |
2484 | 2550 | ||
2485 | if (sbi->s_blocks_per_group > blocksize * 8) { | 2551 | if (sbi->s_blocks_per_group > blocksize * 8) { |
2486 | printk(KERN_ERR | 2552 | ext4_msg(sb, KERN_ERR, |
2487 | "EXT4-fs: #blocks per group too big: %lu\n", | 2553 | "#blocks per group too big: %lu", |
2488 | sbi->s_blocks_per_group); | 2554 | sbi->s_blocks_per_group); |
2489 | goto failed_mount; | 2555 | goto failed_mount; |
2490 | } | 2556 | } |
2491 | if (sbi->s_inodes_per_group > blocksize * 8) { | 2557 | if (sbi->s_inodes_per_group > blocksize * 8) { |
2492 | printk(KERN_ERR | 2558 | ext4_msg(sb, KERN_ERR, |
2493 | "EXT4-fs: #inodes per group too big: %lu\n", | 2559 | "#inodes per group too big: %lu", |
2494 | sbi->s_inodes_per_group); | 2560 | sbi->s_inodes_per_group); |
2495 | goto failed_mount; | 2561 | goto failed_mount; |
2496 | } | 2562 | } |
2497 | 2563 | ||
2498 | if (ext4_blocks_count(es) > | 2564 | if (ext4_blocks_count(es) > |
2499 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 2565 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { |
2500 | printk(KERN_ERR "EXT4-fs: filesystem on %s:" | 2566 | ext4_msg(sb, KERN_ERR, "filesystem" |
2501 | " too large to mount safely\n", sb->s_id); | 2567 | " too large to mount safely"); |
2502 | if (sizeof(sector_t) < 8) | 2568 | if (sizeof(sector_t) < 8) |
2503 | printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " | 2569 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); |
2504 | "enabled\n"); | ||
2505 | goto failed_mount; | 2570 | goto failed_mount; |
2506 | } | 2571 | } |
2507 | 2572 | ||
@@ -2511,21 +2576,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2511 | /* check blocks count against device size */ | 2576 | /* check blocks count against device size */ |
2512 | blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; | 2577 | blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; |
2513 | if (blocks_count && ext4_blocks_count(es) > blocks_count) { | 2578 | if (blocks_count && ext4_blocks_count(es) > blocks_count) { |
2514 | printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " | 2579 | ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " |
2515 | "exceeds size of device (%llu blocks)\n", | 2580 | "exceeds size of device (%llu blocks)", |
2516 | ext4_blocks_count(es), blocks_count); | 2581 | ext4_blocks_count(es), blocks_count); |
2517 | goto failed_mount; | 2582 | goto failed_mount; |
2518 | } | 2583 | } |
2519 | 2584 | ||
2520 | /* | 2585 | /* |
2521 | * It makes no sense for the first data block to be beyond the end | 2586 | * It makes no sense for the first data block to be beyond the end |
2522 | * of the filesystem. | 2587 | * of the filesystem. |
2523 | */ | 2588 | */ |
2524 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { | 2589 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { |
2525 | printk(KERN_WARNING "EXT4-fs: bad geometry: first data" | 2590 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data" |
2526 | "block %u is beyond end of filesystem (%llu)\n", | 2591 | "block %u is beyond end of filesystem (%llu)", |
2527 | le32_to_cpu(es->s_first_data_block), | 2592 | le32_to_cpu(es->s_first_data_block), |
2528 | ext4_blocks_count(es)); | 2593 | ext4_blocks_count(es)); |
2529 | goto failed_mount; | 2594 | goto failed_mount; |
2530 | } | 2595 | } |
2531 | blocks_count = (ext4_blocks_count(es) - | 2596 | blocks_count = (ext4_blocks_count(es) - |
@@ -2533,9 +2598,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2533 | EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2598 | EXT4_BLOCKS_PER_GROUP(sb) - 1); |
2534 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); | 2599 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); |
2535 | if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { | 2600 | if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { |
2536 | printk(KERN_WARNING "EXT4-fs: groups count too large: %u " | 2601 | ext4_msg(sb, KERN_WARNING, "groups count too large: %u " |
2537 | "(block count %llu, first data block %u, " | 2602 | "(block count %llu, first data block %u, " |
2538 | "blocks per group %lu)\n", sbi->s_groups_count, | 2603 | "blocks per group %lu)", sbi->s_groups_count, |
2539 | ext4_blocks_count(es), | 2604 | ext4_blocks_count(es), |
2540 | le32_to_cpu(es->s_first_data_block), | 2605 | le32_to_cpu(es->s_first_data_block), |
2541 | EXT4_BLOCKS_PER_GROUP(sb)); | 2606 | EXT4_BLOCKS_PER_GROUP(sb)); |
@@ -2547,7 +2612,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2547 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), | 2612 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), |
2548 | GFP_KERNEL); | 2613 | GFP_KERNEL); |
2549 | if (sbi->s_group_desc == NULL) { | 2614 | if (sbi->s_group_desc == NULL) { |
2550 | printk(KERN_ERR "EXT4-fs: not enough memory\n"); | 2615 | ext4_msg(sb, KERN_ERR, "not enough memory"); |
2551 | goto failed_mount; | 2616 | goto failed_mount; |
2552 | } | 2617 | } |
2553 | 2618 | ||
@@ -2562,21 +2627,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2562 | block = descriptor_loc(sb, logical_sb_block, i); | 2627 | block = descriptor_loc(sb, logical_sb_block, i); |
2563 | sbi->s_group_desc[i] = sb_bread(sb, block); | 2628 | sbi->s_group_desc[i] = sb_bread(sb, block); |
2564 | if (!sbi->s_group_desc[i]) { | 2629 | if (!sbi->s_group_desc[i]) { |
2565 | printk(KERN_ERR "EXT4-fs: " | 2630 | ext4_msg(sb, KERN_ERR, |
2566 | "can't read group descriptor %d\n", i); | 2631 | "can't read group descriptor %d", i); |
2567 | db_count = i; | 2632 | db_count = i; |
2568 | goto failed_mount2; | 2633 | goto failed_mount2; |
2569 | } | 2634 | } |
2570 | } | 2635 | } |
2571 | if (!ext4_check_descriptors(sb)) { | 2636 | if (!ext4_check_descriptors(sb)) { |
2572 | printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); | 2637 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
2573 | goto failed_mount2; | 2638 | goto failed_mount2; |
2574 | } | 2639 | } |
2575 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 2640 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
2576 | if (!ext4_fill_flex_info(sb)) { | 2641 | if (!ext4_fill_flex_info(sb)) { |
2577 | printk(KERN_ERR | 2642 | ext4_msg(sb, KERN_ERR, |
2578 | "EXT4-fs: unable to initialize " | 2643 | "unable to initialize " |
2579 | "flex_bg meta info!\n"); | 2644 | "flex_bg meta info!"); |
2580 | goto failed_mount2; | 2645 | goto failed_mount2; |
2581 | } | 2646 | } |
2582 | 2647 | ||
@@ -2598,7 +2663,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2598 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | 2663 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); |
2599 | } | 2664 | } |
2600 | if (err) { | 2665 | if (err) { |
2601 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); | 2666 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
2602 | goto failed_mount3; | 2667 | goto failed_mount3; |
2603 | } | 2668 | } |
2604 | 2669 | ||
@@ -2607,7 +2672,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2607 | /* | 2672 | /* |
2608 | * set up enough so that it can read an inode | 2673 | * set up enough so that it can read an inode |
2609 | */ | 2674 | */ |
2610 | sb->s_op = &ext4_sops; | 2675 | if (!test_opt(sb, NOLOAD) && |
2676 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) | ||
2677 | sb->s_op = &ext4_sops; | ||
2678 | else | ||
2679 | sb->s_op = &ext4_nojournal_sops; | ||
2611 | sb->s_export_op = &ext4_export_ops; | 2680 | sb->s_export_op = &ext4_export_ops; |
2612 | sb->s_xattr = ext4_xattr_handlers; | 2681 | sb->s_xattr = ext4_xattr_handlers; |
2613 | #ifdef CONFIG_QUOTA | 2682 | #ifdef CONFIG_QUOTA |
@@ -2615,6 +2684,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2615 | sb->dq_op = &ext4_quota_operations; | 2684 | sb->dq_op = &ext4_quota_operations; |
2616 | #endif | 2685 | #endif |
2617 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 2686 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
2687 | mutex_init(&sbi->s_orphan_lock); | ||
2688 | mutex_init(&sbi->s_resize_lock); | ||
2618 | 2689 | ||
2619 | sb->s_root = NULL; | 2690 | sb->s_root = NULL; |
2620 | 2691 | ||
@@ -2632,13 +2703,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2632 | goto failed_mount3; | 2703 | goto failed_mount3; |
2633 | if (!(sb->s_flags & MS_RDONLY) && | 2704 | if (!(sb->s_flags & MS_RDONLY) && |
2634 | EXT4_SB(sb)->s_journal->j_failed_commit) { | 2705 | EXT4_SB(sb)->s_journal->j_failed_commit) { |
2635 | printk(KERN_CRIT "EXT4-fs error (device %s): " | 2706 | ext4_msg(sb, KERN_CRIT, "error: " |
2636 | "ext4_fill_super: Journal transaction " | 2707 | "ext4_fill_super: Journal transaction " |
2637 | "%u is corrupt\n", sb->s_id, | 2708 | "%u is corrupt", |
2638 | EXT4_SB(sb)->s_journal->j_failed_commit); | 2709 | EXT4_SB(sb)->s_journal->j_failed_commit); |
2639 | if (test_opt(sb, ERRORS_RO)) { | 2710 | if (test_opt(sb, ERRORS_RO)) { |
2640 | printk(KERN_CRIT | 2711 | ext4_msg(sb, KERN_CRIT, |
2641 | "Mounting filesystem read-only\n"); | 2712 | "Mounting filesystem read-only"); |
2642 | sb->s_flags |= MS_RDONLY; | 2713 | sb->s_flags |= MS_RDONLY; |
2643 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 2714 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
2644 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 2715 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
@@ -2646,14 +2717,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2646 | if (test_opt(sb, ERRORS_PANIC)) { | 2717 | if (test_opt(sb, ERRORS_PANIC)) { |
2647 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 2718 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
2648 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 2719 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
2649 | ext4_commit_super(sb, es, 1); | 2720 | ext4_commit_super(sb, 1); |
2650 | goto failed_mount4; | 2721 | goto failed_mount4; |
2651 | } | 2722 | } |
2652 | } | 2723 | } |
2653 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && | 2724 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && |
2654 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 2725 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
2655 | printk(KERN_ERR "EXT4-fs: required journal recovery " | 2726 | ext4_msg(sb, KERN_ERR, "required journal recovery " |
2656 | "suppressed and not mounted read-only\n"); | 2727 | "suppressed and not mounted read-only"); |
2657 | goto failed_mount4; | 2728 | goto failed_mount4; |
2658 | } else { | 2729 | } else { |
2659 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); | 2730 | clear_opt(sbi->s_mount_opt, DATA_FLAGS); |
@@ -2666,7 +2737,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2666 | if (ext4_blocks_count(es) > 0xffffffffULL && | 2737 | if (ext4_blocks_count(es) > 0xffffffffULL && |
2667 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, | 2738 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, |
2668 | JBD2_FEATURE_INCOMPAT_64BIT)) { | 2739 | JBD2_FEATURE_INCOMPAT_64BIT)) { |
2669 | printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); | 2740 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); |
2670 | goto failed_mount4; | 2741 | goto failed_mount4; |
2671 | } | 2742 | } |
2672 | 2743 | ||
@@ -2704,8 +2775,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2704 | case EXT4_MOUNT_WRITEBACK_DATA: | 2775 | case EXT4_MOUNT_WRITEBACK_DATA: |
2705 | if (!jbd2_journal_check_available_features | 2776 | if (!jbd2_journal_check_available_features |
2706 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { | 2777 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { |
2707 | printk(KERN_ERR "EXT4-fs: Journal does not support " | 2778 | ext4_msg(sb, KERN_ERR, "Journal does not support " |
2708 | "requested data journaling mode\n"); | 2779 | "requested data journaling mode"); |
2709 | goto failed_mount4; | 2780 | goto failed_mount4; |
2710 | } | 2781 | } |
2711 | default: | 2782 | default: |
@@ -2717,8 +2788,8 @@ no_journal: | |||
2717 | 2788 | ||
2718 | if (test_opt(sb, NOBH)) { | 2789 | if (test_opt(sb, NOBH)) { |
2719 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { | 2790 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { |
2720 | printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " | 2791 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " |
2721 | "its supported only with writeback mode\n"); | 2792 | "its supported only with writeback mode"); |
2722 | clear_opt(sbi->s_mount_opt, NOBH); | 2793 | clear_opt(sbi->s_mount_opt, NOBH); |
2723 | } | 2794 | } |
2724 | } | 2795 | } |
@@ -2729,18 +2800,18 @@ no_journal: | |||
2729 | 2800 | ||
2730 | root = ext4_iget(sb, EXT4_ROOT_INO); | 2801 | root = ext4_iget(sb, EXT4_ROOT_INO); |
2731 | if (IS_ERR(root)) { | 2802 | if (IS_ERR(root)) { |
2732 | printk(KERN_ERR "EXT4-fs: get root inode failed\n"); | 2803 | ext4_msg(sb, KERN_ERR, "get root inode failed"); |
2733 | ret = PTR_ERR(root); | 2804 | ret = PTR_ERR(root); |
2734 | goto failed_mount4; | 2805 | goto failed_mount4; |
2735 | } | 2806 | } |
2736 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 2807 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
2737 | iput(root); | 2808 | iput(root); |
2738 | printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); | 2809 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
2739 | goto failed_mount4; | 2810 | goto failed_mount4; |
2740 | } | 2811 | } |
2741 | sb->s_root = d_alloc_root(root); | 2812 | sb->s_root = d_alloc_root(root); |
2742 | if (!sb->s_root) { | 2813 | if (!sb->s_root) { |
2743 | printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); | 2814 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
2744 | iput(root); | 2815 | iput(root); |
2745 | ret = -ENOMEM; | 2816 | ret = -ENOMEM; |
2746 | goto failed_mount4; | 2817 | goto failed_mount4; |
@@ -2769,22 +2840,29 @@ no_journal: | |||
2769 | sbi->s_inode_size) { | 2840 | sbi->s_inode_size) { |
2770 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - | 2841 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - |
2771 | EXT4_GOOD_OLD_INODE_SIZE; | 2842 | EXT4_GOOD_OLD_INODE_SIZE; |
2772 | printk(KERN_INFO "EXT4-fs: required extra inode space not" | 2843 | ext4_msg(sb, KERN_INFO, "required extra inode space not" |
2773 | "available.\n"); | 2844 | "available"); |
2774 | } | 2845 | } |
2775 | 2846 | ||
2776 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 2847 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
2777 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | 2848 | ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " |
2778 | "requested data journaling mode\n"); | 2849 | "requested data journaling mode"); |
2779 | clear_opt(sbi->s_mount_opt, DELALLOC); | 2850 | clear_opt(sbi->s_mount_opt, DELALLOC); |
2780 | } else if (test_opt(sb, DELALLOC)) | 2851 | } else if (test_opt(sb, DELALLOC)) |
2781 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | 2852 | ext4_msg(sb, KERN_INFO, "delayed allocation enabled"); |
2853 | |||
2854 | err = ext4_setup_system_zone(sb); | ||
2855 | if (err) { | ||
2856 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | ||
2857 | "zone (%d)\n", err); | ||
2858 | goto failed_mount4; | ||
2859 | } | ||
2782 | 2860 | ||
2783 | ext4_ext_init(sb); | 2861 | ext4_ext_init(sb); |
2784 | err = ext4_mb_init(sb, needs_recovery); | 2862 | err = ext4_mb_init(sb, needs_recovery); |
2785 | if (err) { | 2863 | if (err) { |
2786 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | 2864 | ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", |
2787 | err); | 2865 | err); |
2788 | goto failed_mount4; | 2866 | goto failed_mount4; |
2789 | } | 2867 | } |
2790 | 2868 | ||
@@ -2798,19 +2876,11 @@ no_journal: | |||
2798 | goto failed_mount4; | 2876 | goto failed_mount4; |
2799 | }; | 2877 | }; |
2800 | 2878 | ||
2801 | /* | ||
2802 | * akpm: core read_super() calls in here with the superblock locked. | ||
2803 | * That deadlocks, because orphan cleanup needs to lock the superblock | ||
2804 | * in numerous places. Here we just pop the lock - it's relatively | ||
2805 | * harmless, because we are now ready to accept write_super() requests, | ||
2806 | * and aviro says that's the only reason for hanging onto the | ||
2807 | * superblock lock. | ||
2808 | */ | ||
2809 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; | 2879 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; |
2810 | ext4_orphan_cleanup(sb, es); | 2880 | ext4_orphan_cleanup(sb, es); |
2811 | EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; | 2881 | EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; |
2812 | if (needs_recovery) { | 2882 | if (needs_recovery) { |
2813 | printk(KERN_INFO "EXT4-fs: recovery complete.\n"); | 2883 | ext4_msg(sb, KERN_INFO, "recovery complete"); |
2814 | ext4_mark_recovery_complete(sb, es); | 2884 | ext4_mark_recovery_complete(sb, es); |
2815 | } | 2885 | } |
2816 | if (EXT4_SB(sb)->s_journal) { | 2886 | if (EXT4_SB(sb)->s_journal) { |
@@ -2823,25 +2893,30 @@ no_journal: | |||
2823 | } else | 2893 | } else |
2824 | descr = "out journal"; | 2894 | descr = "out journal"; |
2825 | 2895 | ||
2826 | printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", | 2896 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); |
2827 | sb->s_id, descr); | ||
2828 | 2897 | ||
2829 | lock_kernel(); | 2898 | lock_kernel(); |
2830 | return 0; | 2899 | return 0; |
2831 | 2900 | ||
2832 | cantfind_ext4: | 2901 | cantfind_ext4: |
2833 | if (!silent) | 2902 | if (!silent) |
2834 | printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", | 2903 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); |
2835 | sb->s_id); | ||
2836 | goto failed_mount; | 2904 | goto failed_mount; |
2837 | 2905 | ||
2838 | failed_mount4: | 2906 | failed_mount4: |
2839 | printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); | 2907 | ext4_msg(sb, KERN_ERR, "mount failed"); |
2908 | ext4_release_system_zone(sb); | ||
2840 | if (sbi->s_journal) { | 2909 | if (sbi->s_journal) { |
2841 | jbd2_journal_destroy(sbi->s_journal); | 2910 | jbd2_journal_destroy(sbi->s_journal); |
2842 | sbi->s_journal = NULL; | 2911 | sbi->s_journal = NULL; |
2843 | } | 2912 | } |
2844 | failed_mount3: | 2913 | failed_mount3: |
2914 | if (sbi->s_flex_groups) { | ||
2915 | if (is_vmalloc_addr(sbi->s_flex_groups)) | ||
2916 | vfree(sbi->s_flex_groups); | ||
2917 | else | ||
2918 | kfree(sbi->s_flex_groups); | ||
2919 | } | ||
2845 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2920 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
2846 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2921 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
2847 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2922 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -2862,6 +2937,7 @@ failed_mount: | |||
2862 | brelse(bh); | 2937 | brelse(bh); |
2863 | out_fail: | 2938 | out_fail: |
2864 | sb->s_fs_info = NULL; | 2939 | sb->s_fs_info = NULL; |
2940 | kfree(sbi->s_blockgroup_lock); | ||
2865 | kfree(sbi); | 2941 | kfree(sbi); |
2866 | lock_kernel(); | 2942 | lock_kernel(); |
2867 | return ret; | 2943 | return ret; |
@@ -2906,27 +2982,27 @@ static journal_t *ext4_get_journal(struct super_block *sb, | |||
2906 | 2982 | ||
2907 | journal_inode = ext4_iget(sb, journal_inum); | 2983 | journal_inode = ext4_iget(sb, journal_inum); |
2908 | if (IS_ERR(journal_inode)) { | 2984 | if (IS_ERR(journal_inode)) { |
2909 | printk(KERN_ERR "EXT4-fs: no journal found.\n"); | 2985 | ext4_msg(sb, KERN_ERR, "no journal found"); |
2910 | return NULL; | 2986 | return NULL; |
2911 | } | 2987 | } |
2912 | if (!journal_inode->i_nlink) { | 2988 | if (!journal_inode->i_nlink) { |
2913 | make_bad_inode(journal_inode); | 2989 | make_bad_inode(journal_inode); |
2914 | iput(journal_inode); | 2990 | iput(journal_inode); |
2915 | printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); | 2991 | ext4_msg(sb, KERN_ERR, "journal inode is deleted"); |
2916 | return NULL; | 2992 | return NULL; |
2917 | } | 2993 | } |
2918 | 2994 | ||
2919 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", | 2995 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
2920 | journal_inode, journal_inode->i_size); | 2996 | journal_inode, journal_inode->i_size); |
2921 | if (!S_ISREG(journal_inode->i_mode)) { | 2997 | if (!S_ISREG(journal_inode->i_mode)) { |
2922 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); | 2998 | ext4_msg(sb, KERN_ERR, "invalid journal inode"); |
2923 | iput(journal_inode); | 2999 | iput(journal_inode); |
2924 | return NULL; | 3000 | return NULL; |
2925 | } | 3001 | } |
2926 | 3002 | ||
2927 | journal = jbd2_journal_init_inode(journal_inode); | 3003 | journal = jbd2_journal_init_inode(journal_inode); |
2928 | if (!journal) { | 3004 | if (!journal) { |
2929 | printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); | 3005 | ext4_msg(sb, KERN_ERR, "Could not load journal inode"); |
2930 | iput(journal_inode); | 3006 | iput(journal_inode); |
2931 | return NULL; | 3007 | return NULL; |
2932 | } | 3008 | } |
@@ -2950,22 +3026,22 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
2950 | 3026 | ||
2951 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); | 3027 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); |
2952 | 3028 | ||
2953 | bdev = ext4_blkdev_get(j_dev); | 3029 | bdev = ext4_blkdev_get(j_dev, sb); |
2954 | if (bdev == NULL) | 3030 | if (bdev == NULL) |
2955 | return NULL; | 3031 | return NULL; |
2956 | 3032 | ||
2957 | if (bd_claim(bdev, sb)) { | 3033 | if (bd_claim(bdev, sb)) { |
2958 | printk(KERN_ERR | 3034 | ext4_msg(sb, KERN_ERR, |
2959 | "EXT4-fs: failed to claim external journal device.\n"); | 3035 | "failed to claim external journal device"); |
2960 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); | 3036 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE); |
2961 | return NULL; | 3037 | return NULL; |
2962 | } | 3038 | } |
2963 | 3039 | ||
2964 | blocksize = sb->s_blocksize; | 3040 | blocksize = sb->s_blocksize; |
2965 | hblock = bdev_hardsect_size(bdev); | 3041 | hblock = bdev_logical_block_size(bdev); |
2966 | if (blocksize < hblock) { | 3042 | if (blocksize < hblock) { |
2967 | printk(KERN_ERR | 3043 | ext4_msg(sb, KERN_ERR, |
2968 | "EXT4-fs: blocksize too small for journal device.\n"); | 3044 | "blocksize too small for journal device"); |
2969 | goto out_bdev; | 3045 | goto out_bdev; |
2970 | } | 3046 | } |
2971 | 3047 | ||
@@ -2973,8 +3049,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
2973 | offset = EXT4_MIN_BLOCK_SIZE % blocksize; | 3049 | offset = EXT4_MIN_BLOCK_SIZE % blocksize; |
2974 | set_blocksize(bdev, blocksize); | 3050 | set_blocksize(bdev, blocksize); |
2975 | if (!(bh = __bread(bdev, sb_block, blocksize))) { | 3051 | if (!(bh = __bread(bdev, sb_block, blocksize))) { |
2976 | printk(KERN_ERR "EXT4-fs: couldn't read superblock of " | 3052 | ext4_msg(sb, KERN_ERR, "couldn't read superblock of " |
2977 | "external journal\n"); | 3053 | "external journal"); |
2978 | goto out_bdev; | 3054 | goto out_bdev; |
2979 | } | 3055 | } |
2980 | 3056 | ||
@@ -2982,14 +3058,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
2982 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || | 3058 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || |
2983 | !(le32_to_cpu(es->s_feature_incompat) & | 3059 | !(le32_to_cpu(es->s_feature_incompat) & |
2984 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { | 3060 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { |
2985 | printk(KERN_ERR "EXT4-fs: external journal has " | 3061 | ext4_msg(sb, KERN_ERR, "external journal has " |
2986 | "bad superblock\n"); | 3062 | "bad superblock"); |
2987 | brelse(bh); | 3063 | brelse(bh); |
2988 | goto out_bdev; | 3064 | goto out_bdev; |
2989 | } | 3065 | } |
2990 | 3066 | ||
2991 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { | 3067 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { |
2992 | printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); | 3068 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); |
2993 | brelse(bh); | 3069 | brelse(bh); |
2994 | goto out_bdev; | 3070 | goto out_bdev; |
2995 | } | 3071 | } |
@@ -3001,25 +3077,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
3001 | journal = jbd2_journal_init_dev(bdev, sb->s_bdev, | 3077 | journal = jbd2_journal_init_dev(bdev, sb->s_bdev, |
3002 | start, len, blocksize); | 3078 | start, len, blocksize); |
3003 | if (!journal) { | 3079 | if (!journal) { |
3004 | printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); | 3080 | ext4_msg(sb, KERN_ERR, "failed to create device journal"); |
3005 | goto out_bdev; | 3081 | goto out_bdev; |
3006 | } | 3082 | } |
3007 | journal->j_private = sb; | 3083 | journal->j_private = sb; |
3008 | ll_rw_block(READ, 1, &journal->j_sb_buffer); | 3084 | ll_rw_block(READ, 1, &journal->j_sb_buffer); |
3009 | wait_on_buffer(journal->j_sb_buffer); | 3085 | wait_on_buffer(journal->j_sb_buffer); |
3010 | if (!buffer_uptodate(journal->j_sb_buffer)) { | 3086 | if (!buffer_uptodate(journal->j_sb_buffer)) { |
3011 | printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); | 3087 | ext4_msg(sb, KERN_ERR, "I/O error on journal device"); |
3012 | goto out_journal; | 3088 | goto out_journal; |
3013 | } | 3089 | } |
3014 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { | 3090 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { |
3015 | printk(KERN_ERR "EXT4-fs: External journal has more than one " | 3091 | ext4_msg(sb, KERN_ERR, "External journal has more than one " |
3016 | "user (unsupported) - %d\n", | 3092 | "user (unsupported) - %d", |
3017 | be32_to_cpu(journal->j_superblock->s_nr_users)); | 3093 | be32_to_cpu(journal->j_superblock->s_nr_users)); |
3018 | goto out_journal; | 3094 | goto out_journal; |
3019 | } | 3095 | } |
3020 | EXT4_SB(sb)->journal_bdev = bdev; | 3096 | EXT4_SB(sb)->journal_bdev = bdev; |
3021 | ext4_init_journal_params(sb, journal); | 3097 | ext4_init_journal_params(sb, journal); |
3022 | return journal; | 3098 | return journal; |
3099 | |||
3023 | out_journal: | 3100 | out_journal: |
3024 | jbd2_journal_destroy(journal); | 3101 | jbd2_journal_destroy(journal); |
3025 | out_bdev: | 3102 | out_bdev: |
@@ -3041,8 +3118,8 @@ static int ext4_load_journal(struct super_block *sb, | |||
3041 | 3118 | ||
3042 | if (journal_devnum && | 3119 | if (journal_devnum && |
3043 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 3120 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3044 | printk(KERN_INFO "EXT4-fs: external journal device major/minor " | 3121 | ext4_msg(sb, KERN_INFO, "external journal device major/minor " |
3045 | "numbers have changed\n"); | 3122 | "numbers have changed"); |
3046 | journal_dev = new_decode_dev(journal_devnum); | 3123 | journal_dev = new_decode_dev(journal_devnum); |
3047 | } else | 3124 | } else |
3048 | journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); | 3125 | journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); |
@@ -3054,24 +3131,23 @@ static int ext4_load_journal(struct super_block *sb, | |||
3054 | * crash? For recovery, we need to check in advance whether we | 3131 | * crash? For recovery, we need to check in advance whether we |
3055 | * can get read-write access to the device. | 3132 | * can get read-write access to the device. |
3056 | */ | 3133 | */ |
3057 | |||
3058 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 3134 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
3059 | if (sb->s_flags & MS_RDONLY) { | 3135 | if (sb->s_flags & MS_RDONLY) { |
3060 | printk(KERN_INFO "EXT4-fs: INFO: recovery " | 3136 | ext4_msg(sb, KERN_INFO, "INFO: recovery " |
3061 | "required on readonly filesystem.\n"); | 3137 | "required on readonly filesystem"); |
3062 | if (really_read_only) { | 3138 | if (really_read_only) { |
3063 | printk(KERN_ERR "EXT4-fs: write access " | 3139 | ext4_msg(sb, KERN_ERR, "write access " |
3064 | "unavailable, cannot proceed.\n"); | 3140 | "unavailable, cannot proceed"); |
3065 | return -EROFS; | 3141 | return -EROFS; |
3066 | } | 3142 | } |
3067 | printk(KERN_INFO "EXT4-fs: write access will " | 3143 | ext4_msg(sb, KERN_INFO, "write access will " |
3068 | "be enabled during recovery.\n"); | 3144 | "be enabled during recovery"); |
3069 | } | 3145 | } |
3070 | } | 3146 | } |
3071 | 3147 | ||
3072 | if (journal_inum && journal_dev) { | 3148 | if (journal_inum && journal_dev) { |
3073 | printk(KERN_ERR "EXT4-fs: filesystem has both journal " | 3149 | ext4_msg(sb, KERN_ERR, "filesystem has both journal " |
3074 | "and inode journals!\n"); | 3150 | "and inode journals!"); |
3075 | return -EINVAL; | 3151 | return -EINVAL; |
3076 | } | 3152 | } |
3077 | 3153 | ||
@@ -3084,14 +3160,14 @@ static int ext4_load_journal(struct super_block *sb, | |||
3084 | } | 3160 | } |
3085 | 3161 | ||
3086 | if (journal->j_flags & JBD2_BARRIER) | 3162 | if (journal->j_flags & JBD2_BARRIER) |
3087 | printk(KERN_INFO "EXT4-fs: barriers enabled\n"); | 3163 | ext4_msg(sb, KERN_INFO, "barriers enabled"); |
3088 | else | 3164 | else |
3089 | printk(KERN_INFO "EXT4-fs: barriers disabled\n"); | 3165 | ext4_msg(sb, KERN_INFO, "barriers disabled"); |
3090 | 3166 | ||
3091 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 3167 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
3092 | err = jbd2_journal_update_format(journal); | 3168 | err = jbd2_journal_update_format(journal); |
3093 | if (err) { | 3169 | if (err) { |
3094 | printk(KERN_ERR "EXT4-fs: error updating journal.\n"); | 3170 | ext4_msg(sb, KERN_ERR, "error updating journal"); |
3095 | jbd2_journal_destroy(journal); | 3171 | jbd2_journal_destroy(journal); |
3096 | return err; | 3172 | return err; |
3097 | } | 3173 | } |
@@ -3103,7 +3179,7 @@ static int ext4_load_journal(struct super_block *sb, | |||
3103 | err = jbd2_journal_load(journal); | 3179 | err = jbd2_journal_load(journal); |
3104 | 3180 | ||
3105 | if (err) { | 3181 | if (err) { |
3106 | printk(KERN_ERR "EXT4-fs: error loading journal.\n"); | 3182 | ext4_msg(sb, KERN_ERR, "error loading journal"); |
3107 | jbd2_journal_destroy(journal); | 3183 | jbd2_journal_destroy(journal); |
3108 | return err; | 3184 | return err; |
3109 | } | 3185 | } |
@@ -3114,18 +3190,17 @@ static int ext4_load_journal(struct super_block *sb, | |||
3114 | if (journal_devnum && | 3190 | if (journal_devnum && |
3115 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 3191 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3116 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 3192 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
3117 | sb->s_dirt = 1; | ||
3118 | 3193 | ||
3119 | /* Make sure we flush the recovery flag to disk. */ | 3194 | /* Make sure we flush the recovery flag to disk. */ |
3120 | ext4_commit_super(sb, es, 1); | 3195 | ext4_commit_super(sb, 1); |
3121 | } | 3196 | } |
3122 | 3197 | ||
3123 | return 0; | 3198 | return 0; |
3124 | } | 3199 | } |
3125 | 3200 | ||
3126 | static int ext4_commit_super(struct super_block *sb, | 3201 | static int ext4_commit_super(struct super_block *sb, int sync) |
3127 | struct ext4_super_block *es, int sync) | ||
3128 | { | 3202 | { |
3203 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
3129 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; | 3204 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; |
3130 | int error = 0; | 3205 | int error = 0; |
3131 | 3206 | ||
@@ -3140,8 +3215,8 @@ static int ext4_commit_super(struct super_block *sb, | |||
3140 | * be remapped. Nothing we can do but to retry the | 3215 | * be remapped. Nothing we can do but to retry the |
3141 | * write and hope for the best. | 3216 | * write and hope for the best. |
3142 | */ | 3217 | */ |
3143 | printk(KERN_ERR "EXT4-fs: previous I/O error to " | 3218 | ext4_msg(sb, KERN_ERR, "previous I/O error to " |
3144 | "superblock detected for %s.\n", sb->s_id); | 3219 | "superblock detected"); |
3145 | clear_buffer_write_io_error(sbh); | 3220 | clear_buffer_write_io_error(sbh); |
3146 | set_buffer_uptodate(sbh); | 3221 | set_buffer_uptodate(sbh); |
3147 | } | 3222 | } |
@@ -3154,7 +3229,7 @@ static int ext4_commit_super(struct super_block *sb, | |||
3154 | &EXT4_SB(sb)->s_freeblocks_counter)); | 3229 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3155 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | 3230 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( |
3156 | &EXT4_SB(sb)->s_freeinodes_counter)); | 3231 | &EXT4_SB(sb)->s_freeinodes_counter)); |
3157 | 3232 | sb->s_dirt = 0; | |
3158 | BUFFER_TRACE(sbh, "marking dirty"); | 3233 | BUFFER_TRACE(sbh, "marking dirty"); |
3159 | mark_buffer_dirty(sbh); | 3234 | mark_buffer_dirty(sbh); |
3160 | if (sync) { | 3235 | if (sync) { |
@@ -3164,8 +3239,8 @@ static int ext4_commit_super(struct super_block *sb, | |||
3164 | 3239 | ||
3165 | error = buffer_write_io_error(sbh); | 3240 | error = buffer_write_io_error(sbh); |
3166 | if (error) { | 3241 | if (error) { |
3167 | printk(KERN_ERR "EXT4-fs: I/O error while writing " | 3242 | ext4_msg(sb, KERN_ERR, "I/O error while writing " |
3168 | "superblock for %s.\n", sb->s_id); | 3243 | "superblock"); |
3169 | clear_buffer_write_io_error(sbh); | 3244 | clear_buffer_write_io_error(sbh); |
3170 | set_buffer_uptodate(sbh); | 3245 | set_buffer_uptodate(sbh); |
3171 | } | 3246 | } |
@@ -3173,7 +3248,6 @@ static int ext4_commit_super(struct super_block *sb, | |||
3173 | return error; | 3248 | return error; |
3174 | } | 3249 | } |
3175 | 3250 | ||
3176 | |||
3177 | /* | 3251 | /* |
3178 | * Have we just finished recovery? If so, and if we are mounting (or | 3252 | * Have we just finished recovery? If so, and if we are mounting (or |
3179 | * remounting) the filesystem readonly, then we will end up with a | 3253 | * remounting) the filesystem readonly, then we will end up with a |
@@ -3192,14 +3266,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
3192 | if (jbd2_journal_flush(journal) < 0) | 3266 | if (jbd2_journal_flush(journal) < 0) |
3193 | goto out; | 3267 | goto out; |
3194 | 3268 | ||
3195 | lock_super(sb); | ||
3196 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 3269 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
3197 | sb->s_flags & MS_RDONLY) { | 3270 | sb->s_flags & MS_RDONLY) { |
3198 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3271 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3199 | sb->s_dirt = 0; | 3272 | ext4_commit_super(sb, 1); |
3200 | ext4_commit_super(sb, es, 1); | ||
3201 | } | 3273 | } |
3202 | unlock_super(sb); | ||
3203 | 3274 | ||
3204 | out: | 3275 | out: |
3205 | jbd2_journal_unlock_updates(journal); | 3276 | jbd2_journal_unlock_updates(journal); |
@@ -3238,7 +3309,7 @@ static void ext4_clear_journal_err(struct super_block *sb, | |||
3238 | 3309 | ||
3239 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 3310 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
3240 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 3311 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
3241 | ext4_commit_super(sb, es, 1); | 3312 | ext4_commit_super(sb, 1); |
3242 | 3313 | ||
3243 | jbd2_journal_clear_err(journal); | 3314 | jbd2_journal_clear_err(journal); |
3244 | } | 3315 | } |
@@ -3257,29 +3328,17 @@ int ext4_force_commit(struct super_block *sb) | |||
3257 | return 0; | 3328 | return 0; |
3258 | 3329 | ||
3259 | journal = EXT4_SB(sb)->s_journal; | 3330 | journal = EXT4_SB(sb)->s_journal; |
3260 | if (journal) { | 3331 | if (journal) |
3261 | sb->s_dirt = 0; | ||
3262 | ret = ext4_journal_force_commit(journal); | 3332 | ret = ext4_journal_force_commit(journal); |
3263 | } | ||
3264 | 3333 | ||
3265 | return ret; | 3334 | return ret; |
3266 | } | 3335 | } |
3267 | 3336 | ||
3268 | /* | ||
3269 | * Ext4 always journals updates to the superblock itself, so we don't | ||
3270 | * have to propagate any other updates to the superblock on disk at this | ||
3271 | * point. (We can probably nuke this function altogether, and remove | ||
3272 | * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) | ||
3273 | */ | ||
3274 | static void ext4_write_super(struct super_block *sb) | 3337 | static void ext4_write_super(struct super_block *sb) |
3275 | { | 3338 | { |
3276 | if (EXT4_SB(sb)->s_journal) { | 3339 | lock_super(sb); |
3277 | if (mutex_trylock(&sb->s_lock) != 0) | 3340 | ext4_commit_super(sb, 1); |
3278 | BUG(); | 3341 | unlock_super(sb); |
3279 | sb->s_dirt = 0; | ||
3280 | } else { | ||
3281 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); | ||
3282 | } | ||
3283 | } | 3342 | } |
3284 | 3343 | ||
3285 | static int ext4_sync_fs(struct super_block *sb, int wait) | 3344 | static int ext4_sync_fs(struct super_block *sb, int wait) |
@@ -3288,16 +3347,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
3288 | tid_t target; | 3347 | tid_t target; |
3289 | 3348 | ||
3290 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | 3349 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); |
3291 | sb->s_dirt = 0; | 3350 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
3292 | if (EXT4_SB(sb)->s_journal) { | 3351 | if (wait) |
3293 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, | 3352 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); |
3294 | &target)) { | ||
3295 | if (wait) | ||
3296 | jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, | ||
3297 | target); | ||
3298 | } | ||
3299 | } else { | ||
3300 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); | ||
3301 | } | 3353 | } |
3302 | return ret; | 3354 | return ret; |
3303 | } | 3355 | } |
@@ -3310,34 +3362,32 @@ static int ext4_freeze(struct super_block *sb) | |||
3310 | { | 3362 | { |
3311 | int error = 0; | 3363 | int error = 0; |
3312 | journal_t *journal; | 3364 | journal_t *journal; |
3313 | sb->s_dirt = 0; | ||
3314 | 3365 | ||
3315 | if (!(sb->s_flags & MS_RDONLY)) { | 3366 | if (sb->s_flags & MS_RDONLY) |
3316 | journal = EXT4_SB(sb)->s_journal; | 3367 | return 0; |
3317 | 3368 | ||
3318 | if (journal) { | 3369 | journal = EXT4_SB(sb)->s_journal; |
3319 | /* Now we set up the journal barrier. */ | ||
3320 | jbd2_journal_lock_updates(journal); | ||
3321 | 3370 | ||
3322 | /* | 3371 | /* Now we set up the journal barrier. */ |
3323 | * We don't want to clear needs_recovery flag when we | 3372 | jbd2_journal_lock_updates(journal); |
3324 | * failed to flush the journal. | ||
3325 | */ | ||
3326 | error = jbd2_journal_flush(journal); | ||
3327 | if (error < 0) | ||
3328 | goto out; | ||
3329 | } | ||
3330 | 3373 | ||
3331 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 3374 | /* |
3332 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3375 | * Don't clear the needs_recovery flag if we failed to flush |
3333 | error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); | 3376 | * the journal. |
3334 | if (error) | 3377 | */ |
3335 | goto out; | 3378 | error = jbd2_journal_flush(journal); |
3379 | if (error < 0) { | ||
3380 | out: | ||
3381 | jbd2_journal_unlock_updates(journal); | ||
3382 | return error; | ||
3336 | } | 3383 | } |
3384 | |||
3385 | /* Journal blocked and flushed, clear needs_recovery flag. */ | ||
3386 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | ||
3387 | error = ext4_commit_super(sb, 1); | ||
3388 | if (error) | ||
3389 | goto out; | ||
3337 | return 0; | 3390 | return 0; |
3338 | out: | ||
3339 | jbd2_journal_unlock_updates(journal); | ||
3340 | return error; | ||
3341 | } | 3391 | } |
3342 | 3392 | ||
3343 | /* | 3393 | /* |
@@ -3346,14 +3396,15 @@ out: | |||
3346 | */ | 3396 | */ |
3347 | static int ext4_unfreeze(struct super_block *sb) | 3397 | static int ext4_unfreeze(struct super_block *sb) |
3348 | { | 3398 | { |
3349 | if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { | 3399 | if (sb->s_flags & MS_RDONLY) |
3350 | lock_super(sb); | 3400 | return 0; |
3351 | /* Reser the needs_recovery flag before the fs is unlocked. */ | 3401 | |
3352 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3402 | lock_super(sb); |
3353 | ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); | 3403 | /* Reset the needs_recovery flag before the fs is unlocked. */ |
3354 | unlock_super(sb); | 3404 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3355 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 3405 | ext4_commit_super(sb, 1); |
3356 | } | 3406 | unlock_super(sb); |
3407 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
3357 | return 0; | 3408 | return 0; |
3358 | } | 3409 | } |
3359 | 3410 | ||
@@ -3371,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3371 | int i; | 3422 | int i; |
3372 | #endif | 3423 | #endif |
3373 | 3424 | ||
3425 | lock_kernel(); | ||
3426 | |||
3374 | /* Store the original options */ | 3427 | /* Store the original options */ |
3428 | lock_super(sb); | ||
3375 | old_sb_flags = sb->s_flags; | 3429 | old_sb_flags = sb->s_flags; |
3376 | old_opts.s_mount_opt = sbi->s_mount_opt; | 3430 | old_opts.s_mount_opt = sbi->s_mount_opt; |
3377 | old_opts.s_resuid = sbi->s_resuid; | 3431 | old_opts.s_resuid = sbi->s_resuid; |
@@ -3432,22 +3486,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3432 | (sbi->s_mount_state & EXT4_VALID_FS)) | 3486 | (sbi->s_mount_state & EXT4_VALID_FS)) |
3433 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 3487 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
3434 | 3488 | ||
3435 | /* | 3489 | if (sbi->s_journal) |
3436 | * We have to unlock super so that we can wait for | ||
3437 | * transactions. | ||
3438 | */ | ||
3439 | if (sbi->s_journal) { | ||
3440 | unlock_super(sb); | ||
3441 | ext4_mark_recovery_complete(sb, es); | 3490 | ext4_mark_recovery_complete(sb, es); |
3442 | lock_super(sb); | ||
3443 | } | ||
3444 | } else { | 3491 | } else { |
3445 | int ret; | 3492 | int ret; |
3446 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3493 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3447 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { | 3494 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { |
3448 | printk(KERN_WARNING "EXT4-fs: %s: couldn't " | 3495 | ext4_msg(sb, KERN_WARNING, "couldn't " |
3449 | "remount RDWR because of unsupported " | 3496 | "remount RDWR because of unsupported " |
3450 | "optional features (%x).\n", sb->s_id, | 3497 | "optional features (%x)", |
3451 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & | 3498 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & |
3452 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | 3499 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); |
3453 | err = -EROFS; | 3500 | err = -EROFS; |
@@ -3456,17 +3503,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3456 | 3503 | ||
3457 | /* | 3504 | /* |
3458 | * Make sure the group descriptor checksums | 3505 | * Make sure the group descriptor checksums |
3459 | * are sane. If they aren't, refuse to | 3506 | * are sane. If they aren't, refuse to remount r/w. |
3460 | * remount r/w. | ||
3461 | */ | 3507 | */ |
3462 | for (g = 0; g < sbi->s_groups_count; g++) { | 3508 | for (g = 0; g < sbi->s_groups_count; g++) { |
3463 | struct ext4_group_desc *gdp = | 3509 | struct ext4_group_desc *gdp = |
3464 | ext4_get_group_desc(sb, g, NULL); | 3510 | ext4_get_group_desc(sb, g, NULL); |
3465 | 3511 | ||
3466 | if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { | 3512 | if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { |
3467 | printk(KERN_ERR | 3513 | ext4_msg(sb, KERN_ERR, |
3468 | "EXT4-fs: ext4_remount: " | 3514 | "ext4_remount: Checksum for group %u failed (%u!=%u)", |
3469 | "Checksum for group %u failed (%u!=%u)\n", | ||
3470 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), | 3515 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), |
3471 | le16_to_cpu(gdp->bg_checksum)); | 3516 | le16_to_cpu(gdp->bg_checksum)); |
3472 | err = -EINVAL; | 3517 | err = -EINVAL; |
@@ -3480,11 +3525,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3480 | * require a full umount/remount for now. | 3525 | * require a full umount/remount for now. |
3481 | */ | 3526 | */ |
3482 | if (es->s_last_orphan) { | 3527 | if (es->s_last_orphan) { |
3483 | printk(KERN_WARNING "EXT4-fs: %s: couldn't " | 3528 | ext4_msg(sb, KERN_WARNING, "Couldn't " |
3484 | "remount RDWR because of unprocessed " | 3529 | "remount RDWR because of unprocessed " |
3485 | "orphan inode list. Please " | 3530 | "orphan inode list. Please " |
3486 | "umount/remount instead.\n", | 3531 | "umount/remount instead"); |
3487 | sb->s_id); | ||
3488 | err = -EINVAL; | 3532 | err = -EINVAL; |
3489 | goto restore_opts; | 3533 | goto restore_opts; |
3490 | } | 3534 | } |
@@ -3504,8 +3548,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3504 | sb->s_flags &= ~MS_RDONLY; | 3548 | sb->s_flags &= ~MS_RDONLY; |
3505 | } | 3549 | } |
3506 | } | 3550 | } |
3551 | ext4_setup_system_zone(sb); | ||
3507 | if (sbi->s_journal == NULL) | 3552 | if (sbi->s_journal == NULL) |
3508 | ext4_commit_super(sb, es, 1); | 3553 | ext4_commit_super(sb, 1); |
3509 | 3554 | ||
3510 | #ifdef CONFIG_QUOTA | 3555 | #ifdef CONFIG_QUOTA |
3511 | /* Release old quota file names */ | 3556 | /* Release old quota file names */ |
@@ -3514,7 +3559,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3514 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) | 3559 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) |
3515 | kfree(old_opts.s_qf_names[i]); | 3560 | kfree(old_opts.s_qf_names[i]); |
3516 | #endif | 3561 | #endif |
3562 | unlock_super(sb); | ||
3563 | unlock_kernel(); | ||
3517 | return 0; | 3564 | return 0; |
3565 | |||
3518 | restore_opts: | 3566 | restore_opts: |
3519 | sb->s_flags = old_sb_flags; | 3567 | sb->s_flags = old_sb_flags; |
3520 | sbi->s_mount_opt = old_opts.s_mount_opt; | 3568 | sbi->s_mount_opt = old_opts.s_mount_opt; |
@@ -3532,6 +3580,8 @@ restore_opts: | |||
3532 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 3580 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
3533 | } | 3581 | } |
3534 | #endif | 3582 | #endif |
3583 | unlock_super(sb); | ||
3584 | unlock_kernel(); | ||
3535 | return err; | 3585 | return err; |
3536 | } | 3586 | } |
3537 | 3587 | ||
@@ -3545,9 +3595,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3545 | if (test_opt(sb, MINIX_DF)) { | 3595 | if (test_opt(sb, MINIX_DF)) { |
3546 | sbi->s_overhead_last = 0; | 3596 | sbi->s_overhead_last = 0; |
3547 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { | 3597 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { |
3548 | ext4_group_t ngroups = sbi->s_groups_count, i; | 3598 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
3549 | ext4_fsblk_t overhead = 0; | 3599 | ext4_fsblk_t overhead = 0; |
3550 | smp_rmb(); | ||
3551 | 3600 | ||
3552 | /* | 3601 | /* |
3553 | * Compute the overhead (FS structures). This is constant | 3602 | * Compute the overhead (FS structures). This is constant |
@@ -3599,11 +3648,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3599 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 3648 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
3600 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; | 3649 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; |
3601 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; | 3650 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; |
3651 | |||
3602 | return 0; | 3652 | return 0; |
3603 | } | 3653 | } |
3604 | 3654 | ||
3605 | /* Helper function for writing quotas on sync - we need to start transaction before quota file | 3655 | /* Helper function for writing quotas on sync - we need to start transaction |
3606 | * is locked for write. Otherwise the are possible deadlocks: | 3656 | * before quota file is locked for write. Otherwise the are possible deadlocks: |
3607 | * Process 1 Process 2 | 3657 | * Process 1 Process 2 |
3608 | * ext4_create() quota_sync() | 3658 | * ext4_create() quota_sync() |
3609 | * jbd2_journal_start() write_dquot() | 3659 | * jbd2_journal_start() write_dquot() |
@@ -3627,7 +3677,7 @@ static int ext4_write_dquot(struct dquot *dquot) | |||
3627 | 3677 | ||
3628 | inode = dquot_to_inode(dquot); | 3678 | inode = dquot_to_inode(dquot); |
3629 | handle = ext4_journal_start(inode, | 3679 | handle = ext4_journal_start(inode, |
3630 | EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); | 3680 | EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); |
3631 | if (IS_ERR(handle)) | 3681 | if (IS_ERR(handle)) |
3632 | return PTR_ERR(handle); | 3682 | return PTR_ERR(handle); |
3633 | ret = dquot_commit(dquot); | 3683 | ret = dquot_commit(dquot); |
@@ -3643,7 +3693,7 @@ static int ext4_acquire_dquot(struct dquot *dquot) | |||
3643 | handle_t *handle; | 3693 | handle_t *handle; |
3644 | 3694 | ||
3645 | handle = ext4_journal_start(dquot_to_inode(dquot), | 3695 | handle = ext4_journal_start(dquot_to_inode(dquot), |
3646 | EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); | 3696 | EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); |
3647 | if (IS_ERR(handle)) | 3697 | if (IS_ERR(handle)) |
3648 | return PTR_ERR(handle); | 3698 | return PTR_ERR(handle); |
3649 | ret = dquot_acquire(dquot); | 3699 | ret = dquot_acquire(dquot); |
@@ -3659,7 +3709,7 @@ static int ext4_release_dquot(struct dquot *dquot) | |||
3659 | handle_t *handle; | 3709 | handle_t *handle; |
3660 | 3710 | ||
3661 | handle = ext4_journal_start(dquot_to_inode(dquot), | 3711 | handle = ext4_journal_start(dquot_to_inode(dquot), |
3662 | EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); | 3712 | EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); |
3663 | if (IS_ERR(handle)) { | 3713 | if (IS_ERR(handle)) { |
3664 | /* Release dquot anyway to avoid endless cycle in dqput() */ | 3714 | /* Release dquot anyway to avoid endless cycle in dqput() */ |
3665 | dquot_release(dquot); | 3715 | dquot_release(dquot); |
@@ -3707,7 +3757,7 @@ static int ext4_write_info(struct super_block *sb, int type) | |||
3707 | static int ext4_quota_on_mount(struct super_block *sb, int type) | 3757 | static int ext4_quota_on_mount(struct super_block *sb, int type) |
3708 | { | 3758 | { |
3709 | return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], | 3759 | return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], |
3710 | EXT4_SB(sb)->s_jquota_fmt, type); | 3760 | EXT4_SB(sb)->s_jquota_fmt, type); |
3711 | } | 3761 | } |
3712 | 3762 | ||
3713 | /* | 3763 | /* |
@@ -3738,9 +3788,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3738 | if (EXT4_SB(sb)->s_qf_names[type]) { | 3788 | if (EXT4_SB(sb)->s_qf_names[type]) { |
3739 | /* Quotafile not in fs root? */ | 3789 | /* Quotafile not in fs root? */ |
3740 | if (path.dentry->d_parent != sb->s_root) | 3790 | if (path.dentry->d_parent != sb->s_root) |
3741 | printk(KERN_WARNING | 3791 | ext4_msg(sb, KERN_WARNING, |
3742 | "EXT4-fs: Quota file not on filesystem root. " | 3792 | "Quota file not on filesystem root. " |
3743 | "Journaled quota will not work.\n"); | 3793 | "Journaled quota will not work"); |
3744 | } | 3794 | } |
3745 | 3795 | ||
3746 | /* | 3796 | /* |
@@ -3823,8 +3873,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3823 | handle_t *handle = journal_current_handle(); | 3873 | handle_t *handle = journal_current_handle(); |
3824 | 3874 | ||
3825 | if (EXT4_SB(sb)->s_journal && !handle) { | 3875 | if (EXT4_SB(sb)->s_journal && !handle) { |
3826 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" | 3876 | ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" |
3827 | " cancelled because transaction is not started.\n", | 3877 | " cancelled because transaction is not started", |
3828 | (unsigned long long)off, (unsigned long long)len); | 3878 | (unsigned long long)off, (unsigned long long)len); |
3829 | return -EIO; | 3879 | return -EIO; |
3830 | } | 3880 | } |
@@ -3878,10 +3928,10 @@ out: | |||
3878 | 3928 | ||
3879 | #endif | 3929 | #endif |
3880 | 3930 | ||
3881 | static int ext4_get_sb(struct file_system_type *fs_type, | 3931 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, |
3882 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 3932 | const char *dev_name, void *data, struct vfsmount *mnt) |
3883 | { | 3933 | { |
3884 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3934 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); |
3885 | } | 3935 | } |
3886 | 3936 | ||
3887 | static struct file_system_type ext4_fs_type = { | 3937 | static struct file_system_type ext4_fs_type = { |
@@ -3893,14 +3943,14 @@ static struct file_system_type ext4_fs_type = { | |||
3893 | }; | 3943 | }; |
3894 | 3944 | ||
3895 | #ifdef CONFIG_EXT4DEV_COMPAT | 3945 | #ifdef CONFIG_EXT4DEV_COMPAT |
3896 | static int ext4dev_get_sb(struct file_system_type *fs_type, | 3946 | static int ext4dev_get_sb(struct file_system_type *fs_type, int flags, |
3897 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 3947 | const char *dev_name, void *data,struct vfsmount *mnt) |
3898 | { | 3948 | { |
3899 | printk(KERN_WARNING "EXT4-fs: Update your userspace programs " | 3949 | printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs " |
3900 | "to mount using ext4\n"); | 3950 | "to mount using ext4\n", dev_name); |
3901 | printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " | 3951 | printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility " |
3902 | "will go away by 2.6.31\n"); | 3952 | "will go away by 2.6.31\n", dev_name); |
3903 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3953 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); |
3904 | } | 3954 | } |
3905 | 3955 | ||
3906 | static struct file_system_type ext4dev_fs_type = { | 3956 | static struct file_system_type ext4dev_fs_type = { |
@@ -3917,13 +3967,16 @@ static int __init init_ext4_fs(void) | |||
3917 | { | 3967 | { |
3918 | int err; | 3968 | int err; |
3919 | 3969 | ||
3970 | err = init_ext4_system_zone(); | ||
3971 | if (err) | ||
3972 | return err; | ||
3920 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 3973 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
3921 | if (!ext4_kset) | 3974 | if (!ext4_kset) |
3922 | return -ENOMEM; | 3975 | goto out4; |
3923 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 3976 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
3924 | err = init_ext4_mballoc(); | 3977 | err = init_ext4_mballoc(); |
3925 | if (err) | 3978 | if (err) |
3926 | return err; | 3979 | goto out3; |
3927 | 3980 | ||
3928 | err = init_ext4_xattr(); | 3981 | err = init_ext4_xattr(); |
3929 | if (err) | 3982 | if (err) |
@@ -3948,6 +4001,11 @@ out1: | |||
3948 | exit_ext4_xattr(); | 4001 | exit_ext4_xattr(); |
3949 | out2: | 4002 | out2: |
3950 | exit_ext4_mballoc(); | 4003 | exit_ext4_mballoc(); |
4004 | out3: | ||
4005 | remove_proc_entry("fs/ext4", NULL); | ||
4006 | kset_unregister(ext4_kset); | ||
4007 | out4: | ||
4008 | exit_ext4_system_zone(); | ||
3951 | return err; | 4009 | return err; |
3952 | } | 4010 | } |
3953 | 4011 | ||
@@ -3962,6 +4020,7 @@ static void __exit exit_ext4_fs(void) | |||
3962 | exit_ext4_mballoc(); | 4020 | exit_ext4_mballoc(); |
3963 | remove_proc_entry("fs/ext4", NULL); | 4021 | remove_proc_entry("fs/ext4", NULL); |
3964 | kset_unregister(ext4_kset); | 4022 | kset_unregister(ext4_kset); |
4023 | exit_ext4_system_zone(); | ||
3965 | } | 4024 | } |
3966 | 4025 | ||
3967 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 4026 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 3a7f603b6982..f3500294eec5 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -840,7 +840,7 @@ const struct file_operations fat_dir_operations = { | |||
840 | #ifdef CONFIG_COMPAT | 840 | #ifdef CONFIG_COMPAT |
841 | .compat_ioctl = fat_compat_dir_ioctl, | 841 | .compat_ioctl = fat_compat_dir_ioctl, |
842 | #endif | 842 | #endif |
843 | .fsync = file_fsync, | 843 | .fsync = fat_file_fsync, |
844 | }; | 844 | }; |
845 | 845 | ||
846 | static int fat_get_short_entry(struct inode *dir, loff_t *pos, | 846 | static int fat_get_short_entry(struct inode *dir, loff_t *pos, |
@@ -967,7 +967,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) | |||
967 | de++; | 967 | de++; |
968 | nr_slots--; | 968 | nr_slots--; |
969 | } | 969 | } |
970 | mark_buffer_dirty(bh); | 970 | mark_buffer_dirty_inode(bh, dir); |
971 | if (IS_DIRSYNC(dir)) | 971 | if (IS_DIRSYNC(dir)) |
972 | err = sync_dirty_buffer(bh); | 972 | err = sync_dirty_buffer(bh); |
973 | brelse(bh); | 973 | brelse(bh); |
@@ -1001,7 +1001,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) | |||
1001 | de--; | 1001 | de--; |
1002 | nr_slots--; | 1002 | nr_slots--; |
1003 | } | 1003 | } |
1004 | mark_buffer_dirty(bh); | 1004 | mark_buffer_dirty_inode(bh, dir); |
1005 | if (IS_DIRSYNC(dir)) | 1005 | if (IS_DIRSYNC(dir)) |
1006 | err = sync_dirty_buffer(bh); | 1006 | err = sync_dirty_buffer(bh); |
1007 | brelse(bh); | 1007 | brelse(bh); |
@@ -1051,7 +1051,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, | |||
1051 | } | 1051 | } |
1052 | memset(bhs[n]->b_data, 0, sb->s_blocksize); | 1052 | memset(bhs[n]->b_data, 0, sb->s_blocksize); |
1053 | set_buffer_uptodate(bhs[n]); | 1053 | set_buffer_uptodate(bhs[n]); |
1054 | mark_buffer_dirty(bhs[n]); | 1054 | mark_buffer_dirty_inode(bhs[n], dir); |
1055 | 1055 | ||
1056 | n++; | 1056 | n++; |
1057 | blknr++; | 1057 | blknr++; |
@@ -1131,7 +1131,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) | |||
1131 | de[0].size = de[1].size = 0; | 1131 | de[0].size = de[1].size = 0; |
1132 | memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); | 1132 | memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); |
1133 | set_buffer_uptodate(bhs[0]); | 1133 | set_buffer_uptodate(bhs[0]); |
1134 | mark_buffer_dirty(bhs[0]); | 1134 | mark_buffer_dirty_inode(bhs[0], dir); |
1135 | 1135 | ||
1136 | err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE); | 1136 | err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE); |
1137 | if (err) | 1137 | if (err) |
@@ -1193,7 +1193,7 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, | |||
1193 | slots += copy; | 1193 | slots += copy; |
1194 | size -= copy; | 1194 | size -= copy; |
1195 | set_buffer_uptodate(bhs[n]); | 1195 | set_buffer_uptodate(bhs[n]); |
1196 | mark_buffer_dirty(bhs[n]); | 1196 | mark_buffer_dirty_inode(bhs[n], dir); |
1197 | if (!size) | 1197 | if (!size) |
1198 | break; | 1198 | break; |
1199 | n++; | 1199 | n++; |
@@ -1293,7 +1293,7 @@ found: | |||
1293 | for (i = 0; i < long_bhs; i++) { | 1293 | for (i = 0; i < long_bhs; i++) { |
1294 | int copy = min_t(int, sb->s_blocksize - offset, size); | 1294 | int copy = min_t(int, sb->s_blocksize - offset, size); |
1295 | memcpy(bhs[i]->b_data + offset, slots, copy); | 1295 | memcpy(bhs[i]->b_data + offset, slots, copy); |
1296 | mark_buffer_dirty(bhs[i]); | 1296 | mark_buffer_dirty_inode(bhs[i], dir); |
1297 | offset = 0; | 1297 | offset = 0; |
1298 | slots += copy; | 1298 | slots += copy; |
1299 | size -= copy; | 1299 | size -= copy; |
@@ -1304,7 +1304,7 @@ found: | |||
1304 | /* Fill the short name slot. */ | 1304 | /* Fill the short name slot. */ |
1305 | int copy = min_t(int, sb->s_blocksize - offset, size); | 1305 | int copy = min_t(int, sb->s_blocksize - offset, size); |
1306 | memcpy(bhs[i]->b_data + offset, slots, copy); | 1306 | memcpy(bhs[i]->b_data + offset, slots, copy); |
1307 | mark_buffer_dirty(bhs[i]); | 1307 | mark_buffer_dirty_inode(bhs[i], dir); |
1308 | if (IS_DIRSYNC(dir)) | 1308 | if (IS_DIRSYNC(dir)) |
1309 | err = sync_dirty_buffer(bhs[i]); | 1309 | err = sync_dirty_buffer(bhs[i]); |
1310 | } | 1310 | } |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index ea440d65819c..e4d88527b5dd 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
@@ -74,6 +74,7 @@ struct msdos_sb_info { | |||
74 | 74 | ||
75 | int fatent_shift; | 75 | int fatent_shift; |
76 | struct fatent_operations *fatent_ops; | 76 | struct fatent_operations *fatent_ops; |
77 | struct inode *fat_inode; | ||
77 | 78 | ||
78 | spinlock_t inode_hash_lock; | 79 | spinlock_t inode_hash_lock; |
79 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; | 80 | struct hlist_head inode_hashtable[FAT_HASH_SIZE]; |
@@ -251,6 +252,7 @@ struct fat_entry { | |||
251 | } u; | 252 | } u; |
252 | int nr_bhs; | 253 | int nr_bhs; |
253 | struct buffer_head *bhs[2]; | 254 | struct buffer_head *bhs[2]; |
255 | struct inode *fat_inode; | ||
254 | }; | 256 | }; |
255 | 257 | ||
256 | static inline void fatent_init(struct fat_entry *fatent) | 258 | static inline void fatent_init(struct fat_entry *fatent) |
@@ -259,6 +261,7 @@ static inline void fatent_init(struct fat_entry *fatent) | |||
259 | fatent->entry = 0; | 261 | fatent->entry = 0; |
260 | fatent->u.ent32_p = NULL; | 262 | fatent->u.ent32_p = NULL; |
261 | fatent->bhs[0] = fatent->bhs[1] = NULL; | 263 | fatent->bhs[0] = fatent->bhs[1] = NULL; |
264 | fatent->fat_inode = NULL; | ||
262 | } | 265 | } |
263 | 266 | ||
264 | static inline void fatent_set_entry(struct fat_entry *fatent, int entry) | 267 | static inline void fatent_set_entry(struct fat_entry *fatent, int entry) |
@@ -275,6 +278,7 @@ static inline void fatent_brelse(struct fat_entry *fatent) | |||
275 | brelse(fatent->bhs[i]); | 278 | brelse(fatent->bhs[i]); |
276 | fatent->nr_bhs = 0; | 279 | fatent->nr_bhs = 0; |
277 | fatent->bhs[0] = fatent->bhs[1] = NULL; | 280 | fatent->bhs[0] = fatent->bhs[1] = NULL; |
281 | fatent->fat_inode = NULL; | ||
278 | } | 282 | } |
279 | 283 | ||
280 | extern void fat_ent_access_init(struct super_block *sb); | 284 | extern void fat_ent_access_init(struct super_block *sb); |
@@ -296,6 +300,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr); | |||
296 | extern void fat_truncate(struct inode *inode); | 300 | extern void fat_truncate(struct inode *inode); |
297 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, | 301 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, |
298 | struct kstat *stat); | 302 | struct kstat *stat); |
303 | extern int fat_file_fsync(struct file *file, struct dentry *dentry, | ||
304 | int datasync); | ||
299 | 305 | ||
300 | /* fat/inode.c */ | 306 | /* fat/inode.c */ |
301 | extern void fat_attach(struct inode *inode, loff_t i_pos); | 307 | extern void fat_attach(struct inode *inode, loff_t i_pos); |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index da6eea47872f..618f5305c2e4 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -73,6 +73,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, | |||
73 | struct buffer_head **bhs = fatent->bhs; | 73 | struct buffer_head **bhs = fatent->bhs; |
74 | 74 | ||
75 | WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); | 75 | WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); |
76 | fatent->fat_inode = MSDOS_SB(sb)->fat_inode; | ||
77 | |||
76 | bhs[0] = sb_bread(sb, blocknr); | 78 | bhs[0] = sb_bread(sb, blocknr); |
77 | if (!bhs[0]) | 79 | if (!bhs[0]) |
78 | goto err; | 80 | goto err; |
@@ -103,6 +105,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, | |||
103 | struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; | 105 | struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; |
104 | 106 | ||
105 | WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); | 107 | WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); |
108 | fatent->fat_inode = MSDOS_SB(sb)->fat_inode; | ||
106 | fatent->bhs[0] = sb_bread(sb, blocknr); | 109 | fatent->bhs[0] = sb_bread(sb, blocknr); |
107 | if (!fatent->bhs[0]) { | 110 | if (!fatent->bhs[0]) { |
108 | printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", | 111 | printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", |
@@ -167,9 +170,9 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) | |||
167 | } | 170 | } |
168 | spin_unlock(&fat12_entry_lock); | 171 | spin_unlock(&fat12_entry_lock); |
169 | 172 | ||
170 | mark_buffer_dirty(fatent->bhs[0]); | 173 | mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); |
171 | if (fatent->nr_bhs == 2) | 174 | if (fatent->nr_bhs == 2) |
172 | mark_buffer_dirty(fatent->bhs[1]); | 175 | mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode); |
173 | } | 176 | } |
174 | 177 | ||
175 | static void fat16_ent_put(struct fat_entry *fatent, int new) | 178 | static void fat16_ent_put(struct fat_entry *fatent, int new) |
@@ -178,7 +181,7 @@ static void fat16_ent_put(struct fat_entry *fatent, int new) | |||
178 | new = EOF_FAT16; | 181 | new = EOF_FAT16; |
179 | 182 | ||
180 | *fatent->u.ent16_p = cpu_to_le16(new); | 183 | *fatent->u.ent16_p = cpu_to_le16(new); |
181 | mark_buffer_dirty(fatent->bhs[0]); | 184 | mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); |
182 | } | 185 | } |
183 | 186 | ||
184 | static void fat32_ent_put(struct fat_entry *fatent, int new) | 187 | static void fat32_ent_put(struct fat_entry *fatent, int new) |
@@ -189,7 +192,7 @@ static void fat32_ent_put(struct fat_entry *fatent, int new) | |||
189 | WARN_ON(new & 0xf0000000); | 192 | WARN_ON(new & 0xf0000000); |
190 | new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; | 193 | new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; |
191 | *fatent->u.ent32_p = cpu_to_le32(new); | 194 | *fatent->u.ent32_p = cpu_to_le32(new); |
192 | mark_buffer_dirty(fatent->bhs[0]); | 195 | mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); |
193 | } | 196 | } |
194 | 197 | ||
195 | static int fat12_ent_next(struct fat_entry *fatent) | 198 | static int fat12_ent_next(struct fat_entry *fatent) |
@@ -381,7 +384,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, | |||
381 | } | 384 | } |
382 | memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); | 385 | memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); |
383 | set_buffer_uptodate(c_bh); | 386 | set_buffer_uptodate(c_bh); |
384 | mark_buffer_dirty(c_bh); | 387 | mark_buffer_dirty_inode(c_bh, sbi->fat_inode); |
385 | if (sb->s_flags & MS_SYNCHRONOUS) | 388 | if (sb->s_flags & MS_SYNCHRONOUS) |
386 | err = sync_dirty_buffer(c_bh); | 389 | err = sync_dirty_buffer(c_bh); |
387 | brelse(c_bh); | 390 | brelse(c_bh); |
diff --git a/fs/fat/file.c b/fs/fat/file.c index 0a7f4a9918b3..e955a56b4e5e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -133,6 +133,18 @@ static int fat_file_release(struct inode *inode, struct file *filp) | |||
133 | return 0; | 133 | return 0; |
134 | } | 134 | } |
135 | 135 | ||
136 | int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync) | ||
137 | { | ||
138 | struct inode *inode = dentry->d_inode; | ||
139 | int res, err; | ||
140 | |||
141 | res = simple_fsync(filp, dentry, datasync); | ||
142 | err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); | ||
143 | |||
144 | return res ? res : err; | ||
145 | } | ||
146 | |||
147 | |||
136 | const struct file_operations fat_file_operations = { | 148 | const struct file_operations fat_file_operations = { |
137 | .llseek = generic_file_llseek, | 149 | .llseek = generic_file_llseek, |
138 | .read = do_sync_read, | 150 | .read = do_sync_read, |
@@ -142,7 +154,7 @@ const struct file_operations fat_file_operations = { | |||
142 | .mmap = generic_file_mmap, | 154 | .mmap = generic_file_mmap, |
143 | .release = fat_file_release, | 155 | .release = fat_file_release, |
144 | .ioctl = fat_generic_ioctl, | 156 | .ioctl = fat_generic_ioctl, |
145 | .fsync = file_fsync, | 157 | .fsync = fat_file_fsync, |
146 | .splice_read = generic_file_splice_read, | 158 | .splice_read = generic_file_splice_read, |
147 | }; | 159 | }; |
148 | 160 | ||
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 296785a0dec8..51a5ecf9000a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -441,16 +441,35 @@ static void fat_clear_inode(struct inode *inode) | |||
441 | 441 | ||
442 | static void fat_write_super(struct super_block *sb) | 442 | static void fat_write_super(struct super_block *sb) |
443 | { | 443 | { |
444 | lock_super(sb); | ||
444 | sb->s_dirt = 0; | 445 | sb->s_dirt = 0; |
445 | 446 | ||
446 | if (!(sb->s_flags & MS_RDONLY)) | 447 | if (!(sb->s_flags & MS_RDONLY)) |
447 | fat_clusters_flush(sb); | 448 | fat_clusters_flush(sb); |
449 | unlock_super(sb); | ||
450 | } | ||
451 | |||
452 | static int fat_sync_fs(struct super_block *sb, int wait) | ||
453 | { | ||
454 | lock_super(sb); | ||
455 | fat_clusters_flush(sb); | ||
456 | sb->s_dirt = 0; | ||
457 | unlock_super(sb); | ||
458 | |||
459 | return 0; | ||
448 | } | 460 | } |
449 | 461 | ||
450 | static void fat_put_super(struct super_block *sb) | 462 | static void fat_put_super(struct super_block *sb) |
451 | { | 463 | { |
452 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 464 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
453 | 465 | ||
466 | lock_kernel(); | ||
467 | |||
468 | if (sb->s_dirt) | ||
469 | fat_write_super(sb); | ||
470 | |||
471 | iput(sbi->fat_inode); | ||
472 | |||
454 | if (sbi->nls_disk) { | 473 | if (sbi->nls_disk) { |
455 | unload_nls(sbi->nls_disk); | 474 | unload_nls(sbi->nls_disk); |
456 | sbi->nls_disk = NULL; | 475 | sbi->nls_disk = NULL; |
@@ -467,6 +486,8 @@ static void fat_put_super(struct super_block *sb) | |||
467 | 486 | ||
468 | sb->s_fs_info = NULL; | 487 | sb->s_fs_info = NULL; |
469 | kfree(sbi); | 488 | kfree(sbi); |
489 | |||
490 | unlock_kernel(); | ||
470 | } | 491 | } |
471 | 492 | ||
472 | static struct kmem_cache *fat_inode_cachep; | 493 | static struct kmem_cache *fat_inode_cachep; |
@@ -632,6 +653,7 @@ static const struct super_operations fat_sops = { | |||
632 | .delete_inode = fat_delete_inode, | 653 | .delete_inode = fat_delete_inode, |
633 | .put_super = fat_put_super, | 654 | .put_super = fat_put_super, |
634 | .write_super = fat_write_super, | 655 | .write_super = fat_write_super, |
656 | .sync_fs = fat_sync_fs, | ||
635 | .statfs = fat_statfs, | 657 | .statfs = fat_statfs, |
636 | .clear_inode = fat_clear_inode, | 658 | .clear_inode = fat_clear_inode, |
637 | .remount_fs = fat_remount, | 659 | .remount_fs = fat_remount, |
@@ -1174,7 +1196,7 @@ static int fat_read_root(struct inode *inode) | |||
1174 | int fat_fill_super(struct super_block *sb, void *data, int silent, | 1196 | int fat_fill_super(struct super_block *sb, void *data, int silent, |
1175 | const struct inode_operations *fs_dir_inode_ops, int isvfat) | 1197 | const struct inode_operations *fs_dir_inode_ops, int isvfat) |
1176 | { | 1198 | { |
1177 | struct inode *root_inode = NULL; | 1199 | struct inode *root_inode = NULL, *fat_inode = NULL; |
1178 | struct buffer_head *bh; | 1200 | struct buffer_head *bh; |
1179 | struct fat_boot_sector *b; | 1201 | struct fat_boot_sector *b; |
1180 | struct msdos_sb_info *sbi; | 1202 | struct msdos_sb_info *sbi; |
@@ -1414,6 +1436,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
1414 | } | 1436 | } |
1415 | 1437 | ||
1416 | error = -ENOMEM; | 1438 | error = -ENOMEM; |
1439 | fat_inode = new_inode(sb); | ||
1440 | if (!fat_inode) | ||
1441 | goto out_fail; | ||
1442 | MSDOS_I(fat_inode)->i_pos = 0; | ||
1443 | sbi->fat_inode = fat_inode; | ||
1417 | root_inode = new_inode(sb); | 1444 | root_inode = new_inode(sb); |
1418 | if (!root_inode) | 1445 | if (!root_inode) |
1419 | goto out_fail; | 1446 | goto out_fail; |
@@ -1439,6 +1466,8 @@ out_invalid: | |||
1439 | " on dev %s.\n", sb->s_id); | 1466 | " on dev %s.\n", sb->s_id); |
1440 | 1467 | ||
1441 | out_fail: | 1468 | out_fail: |
1469 | if (fat_inode) | ||
1470 | iput(fat_inode); | ||
1442 | if (root_inode) | 1471 | if (root_inode) |
1443 | iput(root_inode); | 1472 | iput(root_inode); |
1444 | if (sbi->nls_io) | 1473 | if (sbi->nls_io) |
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index da3f361a37dd..20f522861355 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
@@ -544,7 +544,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, | |||
544 | int start = MSDOS_I(new_dir)->i_logstart; | 544 | int start = MSDOS_I(new_dir)->i_logstart; |
545 | dotdot_de->start = cpu_to_le16(start); | 545 | dotdot_de->start = cpu_to_le16(start); |
546 | dotdot_de->starthi = cpu_to_le16(start >> 16); | 546 | dotdot_de->starthi = cpu_to_le16(start >> 16); |
547 | mark_buffer_dirty(dotdot_bh); | 547 | mark_buffer_dirty_inode(dotdot_bh, old_inode); |
548 | if (IS_DIRSYNC(new_dir)) { | 548 | if (IS_DIRSYNC(new_dir)) { |
549 | err = sync_dirty_buffer(dotdot_bh); | 549 | err = sync_dirty_buffer(dotdot_bh); |
550 | if (err) | 550 | if (err) |
@@ -586,7 +586,7 @@ error_dotdot: | |||
586 | int start = MSDOS_I(old_dir)->i_logstart; | 586 | int start = MSDOS_I(old_dir)->i_logstart; |
587 | dotdot_de->start = cpu_to_le16(start); | 587 | dotdot_de->start = cpu_to_le16(start); |
588 | dotdot_de->starthi = cpu_to_le16(start >> 16); | 588 | dotdot_de->starthi = cpu_to_le16(start >> 16); |
589 | mark_buffer_dirty(dotdot_bh); | 589 | mark_buffer_dirty_inode(dotdot_bh, old_inode); |
590 | corrupt |= sync_dirty_buffer(dotdot_bh); | 590 | corrupt |= sync_dirty_buffer(dotdot_bh); |
591 | } | 591 | } |
592 | error_inode: | 592 | error_inode: |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index a0e00e3a46e9..b50ecbe97f83 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -965,7 +965,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
965 | int start = MSDOS_I(new_dir)->i_logstart; | 965 | int start = MSDOS_I(new_dir)->i_logstart; |
966 | dotdot_de->start = cpu_to_le16(start); | 966 | dotdot_de->start = cpu_to_le16(start); |
967 | dotdot_de->starthi = cpu_to_le16(start >> 16); | 967 | dotdot_de->starthi = cpu_to_le16(start >> 16); |
968 | mark_buffer_dirty(dotdot_bh); | 968 | mark_buffer_dirty_inode(dotdot_bh, old_inode); |
969 | if (IS_DIRSYNC(new_dir)) { | 969 | if (IS_DIRSYNC(new_dir)) { |
970 | err = sync_dirty_buffer(dotdot_bh); | 970 | err = sync_dirty_buffer(dotdot_bh); |
971 | if (err) | 971 | if (err) |
@@ -1009,7 +1009,7 @@ error_dotdot: | |||
1009 | int start = MSDOS_I(old_dir)->i_logstart; | 1009 | int start = MSDOS_I(old_dir)->i_logstart; |
1010 | dotdot_de->start = cpu_to_le16(start); | 1010 | dotdot_de->start = cpu_to_le16(start); |
1011 | dotdot_de->starthi = cpu_to_le16(start >> 16); | 1011 | dotdot_de->starthi = cpu_to_le16(start >> 16); |
1012 | mark_buffer_dirty(dotdot_bh); | 1012 | mark_buffer_dirty_inode(dotdot_bh, old_inode); |
1013 | corrupt |= sync_dirty_buffer(dotdot_bh); | 1013 | corrupt |= sync_dirty_buffer(dotdot_bh); |
1014 | } | 1014 | } |
1015 | error_inode: | 1015 | error_inode: |
diff --git a/fs/file_table.c b/fs/file_table.c index 54018fe48840..334ce39881f8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, | |||
214 | */ | 214 | */ |
215 | if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { | 215 | if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { |
216 | file_take_write(file); | 216 | file_take_write(file); |
217 | error = mnt_want_write(mnt); | 217 | error = mnt_clone_write(mnt); |
218 | WARN_ON(error); | 218 | WARN_ON(error); |
219 | } | 219 | } |
220 | return error; | 220 | return error; |
@@ -399,6 +399,44 @@ too_bad: | |||
399 | return 0; | 399 | return 0; |
400 | } | 400 | } |
401 | 401 | ||
402 | /** | ||
403 | * mark_files_ro - mark all files read-only | ||
404 | * @sb: superblock in question | ||
405 | * | ||
406 | * All files are marked read-only. We don't care about pending | ||
407 | * delete files so this should be used in 'force' mode only. | ||
408 | */ | ||
409 | void mark_files_ro(struct super_block *sb) | ||
410 | { | ||
411 | struct file *f; | ||
412 | |||
413 | retry: | ||
414 | file_list_lock(); | ||
415 | list_for_each_entry(f, &sb->s_files, f_u.fu_list) { | ||
416 | struct vfsmount *mnt; | ||
417 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) | ||
418 | continue; | ||
419 | if (!file_count(f)) | ||
420 | continue; | ||
421 | if (!(f->f_mode & FMODE_WRITE)) | ||
422 | continue; | ||
423 | f->f_mode &= ~FMODE_WRITE; | ||
424 | if (file_check_writeable(f) != 0) | ||
425 | continue; | ||
426 | file_release_write(f); | ||
427 | mnt = mntget(f->f_path.mnt); | ||
428 | file_list_unlock(); | ||
429 | /* | ||
430 | * This can sleep, so we can't hold | ||
431 | * the file_list_lock() spinlock. | ||
432 | */ | ||
433 | mnt_drop_write(mnt); | ||
434 | mntput(mnt); | ||
435 | goto retry; | ||
436 | } | ||
437 | file_list_unlock(); | ||
438 | } | ||
439 | |||
402 | void __init files_init(unsigned long mempages) | 440 | void __init files_init(unsigned long mempages) |
403 | { | 441 | { |
404 | int n; | 442 | int n; |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 1dacda831577..cdbd1654e4cd 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
@@ -80,12 +80,16 @@ vxfs_put_super(struct super_block *sbp) | |||
80 | { | 80 | { |
81 | struct vxfs_sb_info *infp = VXFS_SBI(sbp); | 81 | struct vxfs_sb_info *infp = VXFS_SBI(sbp); |
82 | 82 | ||
83 | lock_kernel(); | ||
84 | |||
83 | vxfs_put_fake_inode(infp->vsi_fship); | 85 | vxfs_put_fake_inode(infp->vsi_fship); |
84 | vxfs_put_fake_inode(infp->vsi_ilist); | 86 | vxfs_put_fake_inode(infp->vsi_ilist); |
85 | vxfs_put_fake_inode(infp->vsi_stilist); | 87 | vxfs_put_fake_inode(infp->vsi_stilist); |
86 | 88 | ||
87 | brelse(infp->vsi_bp); | 89 | brelse(infp->vsi_bp); |
88 | kfree(infp); | 90 | kfree(infp); |
91 | |||
92 | unlock_kernel(); | ||
89 | } | 93 | } |
90 | 94 | ||
91 | /** | 95 | /** |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 91013ff7dd53..40308e98c6a4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi) | |||
64 | clear_bit(BDI_pdflush, &bdi->state); | 64 | clear_bit(BDI_pdflush, &bdi->state); |
65 | } | 65 | } |
66 | 66 | ||
67 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | ||
68 | { | ||
69 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | ||
70 | struct dentry *dentry; | ||
71 | const char *name = "?"; | ||
72 | |||
73 | dentry = d_find_alias(inode); | ||
74 | if (dentry) { | ||
75 | spin_lock(&dentry->d_lock); | ||
76 | name = (const char *) dentry->d_name.name; | ||
77 | } | ||
78 | printk(KERN_DEBUG | ||
79 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
80 | current->comm, task_pid_nr(current), inode->i_ino, | ||
81 | name, inode->i_sb->s_id); | ||
82 | if (dentry) { | ||
83 | spin_unlock(&dentry->d_lock); | ||
84 | dput(dentry); | ||
85 | } | ||
86 | } | ||
87 | } | ||
88 | |||
67 | /** | 89 | /** |
68 | * __mark_inode_dirty - internal function | 90 | * __mark_inode_dirty - internal function |
69 | * @inode: inode to mark | 91 | * @inode: inode to mark |
@@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
114 | if ((inode->i_state & flags) == flags) | 136 | if ((inode->i_state & flags) == flags) |
115 | return; | 137 | return; |
116 | 138 | ||
117 | if (unlikely(block_dump)) { | 139 | if (unlikely(block_dump)) |
118 | struct dentry *dentry = NULL; | 140 | block_dump___mark_inode_dirty(inode); |
119 | const char *name = "?"; | ||
120 | |||
121 | if (!list_empty(&inode->i_dentry)) { | ||
122 | dentry = list_entry(inode->i_dentry.next, | ||
123 | struct dentry, d_alias); | ||
124 | if (dentry && dentry->d_name.name) | ||
125 | name = (const char *) dentry->d_name.name; | ||
126 | } | ||
127 | |||
128 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) | ||
129 | printk(KERN_DEBUG | ||
130 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
131 | current->comm, task_pid_nr(current), inode->i_ino, | ||
132 | name, inode->i_sb->s_id); | ||
133 | } | ||
134 | 141 | ||
135 | spin_lock(&inode_lock); | 142 | spin_lock(&inode_lock); |
136 | if ((inode->i_state & flags) != flags) { | 143 | if ((inode->i_state & flags) != flags) { |
@@ -289,7 +296,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
289 | int ret; | 296 | int ret; |
290 | 297 | ||
291 | BUG_ON(inode->i_state & I_SYNC); | 298 | BUG_ON(inode->i_state & I_SYNC); |
292 | WARN_ON(inode->i_state & I_NEW); | ||
293 | 299 | ||
294 | /* Set I_SYNC, reset I_DIRTY */ | 300 | /* Set I_SYNC, reset I_DIRTY */ |
295 | dirty = inode->i_state & I_DIRTY; | 301 | dirty = inode->i_state & I_DIRTY; |
@@ -314,7 +320,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
314 | } | 320 | } |
315 | 321 | ||
316 | spin_lock(&inode_lock); | 322 | spin_lock(&inode_lock); |
317 | WARN_ON(inode->i_state & I_NEW); | ||
318 | inode->i_state &= ~I_SYNC; | 323 | inode->i_state &= ~I_SYNC; |
319 | if (!(inode->i_state & I_FREEING)) { | 324 | if (!(inode->i_state & I_FREEING)) { |
320 | if (!(inode->i_state & I_DIRTY) && | 325 | if (!(inode->i_state & I_DIRTY) && |
@@ -679,55 +684,6 @@ void sync_inodes_sb(struct super_block *sb, int wait) | |||
679 | } | 684 | } |
680 | 685 | ||
681 | /** | 686 | /** |
682 | * sync_inodes - writes all inodes to disk | ||
683 | * @wait: wait for completion | ||
684 | * | ||
685 | * sync_inodes() goes through each super block's dirty inode list, writes the | ||
686 | * inodes out, waits on the writeout and puts the inodes back on the normal | ||
687 | * list. | ||
688 | * | ||
689 | * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle | ||
690 | * part of the sync functions is that the blockdev "superblock" is processed | ||
691 | * last. This is because the write_inode() function of a typical fs will | ||
692 | * perform no I/O, but will mark buffers in the blockdev mapping as dirty. | ||
693 | * What we want to do is to perform all that dirtying first, and then write | ||
694 | * back all those inode blocks via the blockdev mapping in one sweep. So the | ||
695 | * additional (somewhat redundant) sync_blockdev() calls here are to make | ||
696 | * sure that really happens. Because if we call sync_inodes_sb(wait=1) with | ||
697 | * outstanding dirty inodes, the writeback goes block-at-a-time within the | ||
698 | * filesystem's write_inode(). This is extremely slow. | ||
699 | */ | ||
700 | static void __sync_inodes(int wait) | ||
701 | { | ||
702 | struct super_block *sb; | ||
703 | |||
704 | spin_lock(&sb_lock); | ||
705 | restart: | ||
706 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
707 | sb->s_count++; | ||
708 | spin_unlock(&sb_lock); | ||
709 | down_read(&sb->s_umount); | ||
710 | if (sb->s_root) { | ||
711 | sync_inodes_sb(sb, wait); | ||
712 | sync_blockdev(sb->s_bdev); | ||
713 | } | ||
714 | up_read(&sb->s_umount); | ||
715 | spin_lock(&sb_lock); | ||
716 | if (__put_super_and_need_restart(sb)) | ||
717 | goto restart; | ||
718 | } | ||
719 | spin_unlock(&sb_lock); | ||
720 | } | ||
721 | |||
722 | void sync_inodes(int wait) | ||
723 | { | ||
724 | __sync_inodes(0); | ||
725 | |||
726 | if (wait) | ||
727 | __sync_inodes(1); | ||
728 | } | ||
729 | |||
730 | /** | ||
731 | * write_inode_now - write an inode to disk | 687 | * write_inode_now - write an inode to disk |
732 | * @inode: inode to write to disk | 688 | * @inode: inode to write to disk |
733 | * @sync: whether the write should be synchronous or not | 689 | * @sync: whether the write should be synchronous or not |
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 72437065f6ad..e95eeb445e58 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile | |||
@@ -3,5 +3,6 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_FUSE_FS) += fuse.o | 5 | obj-$(CONFIG_FUSE_FS) += fuse.o |
6 | obj-$(CONFIG_CUSE) += cuse.o | ||
6 | 7 | ||
7 | fuse-objs := dev.o dir.o file.o inode.o control.o | 8 | fuse-objs := dev.o dir.o file.o inode.o control.o |
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c new file mode 100644 index 000000000000..de792dcf3274 --- /dev/null +++ b/fs/fuse/cuse.c | |||
@@ -0,0 +1,610 @@ | |||
1 | /* | ||
2 | * CUSE: Character device in Userspace | ||
3 | * | ||
4 | * Copyright (C) 2008-2009 SUSE Linux Products GmbH | ||
5 | * Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | * | ||
9 | * CUSE enables character devices to be implemented from userland much | ||
10 | * like FUSE allows filesystems. On initialization /dev/cuse is | ||
11 | * created. By opening the file and replying to the CUSE_INIT request | ||
12 | * userland CUSE server can create a character device. After that the | ||
13 | * operation is very similar to FUSE. | ||
14 | * | ||
15 | * A CUSE instance involves the following objects. | ||
16 | * | ||
17 | * cuse_conn : contains fuse_conn and serves as bonding structure | ||
18 | * channel : file handle connected to the userland CUSE server | ||
19 | * cdev : the implemented character device | ||
20 | * dev : generic device for cdev | ||
21 | * | ||
22 | * Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with | ||
23 | * devices, it's called 'channel' to reduce confusion. | ||
24 | * | ||
25 | * channel determines when the character device dies. When channel is | ||
26 | * closed, everything begins to destruct. The cuse_conn is taken off | ||
27 | * the lookup table preventing further access from cdev, cdev and | ||
28 | * generic device are removed and the base reference of cuse_conn is | ||
29 | * put. | ||
30 | * | ||
31 | * On each open, the matching cuse_conn is looked up and if found an | ||
32 | * additional reference is taken which is released when the file is | ||
33 | * closed. | ||
34 | */ | ||
35 | |||
36 | #include <linux/fuse.h> | ||
37 | #include <linux/cdev.h> | ||
38 | #include <linux/device.h> | ||
39 | #include <linux/file.h> | ||
40 | #include <linux/fs.h> | ||
41 | #include <linux/kdev_t.h> | ||
42 | #include <linux/kthread.h> | ||
43 | #include <linux/list.h> | ||
44 | #include <linux/magic.h> | ||
45 | #include <linux/miscdevice.h> | ||
46 | #include <linux/mutex.h> | ||
47 | #include <linux/spinlock.h> | ||
48 | #include <linux/stat.h> | ||
49 | |||
50 | #include "fuse_i.h" | ||
51 | |||
52 | #define CUSE_CONNTBL_LEN 64 | ||
53 | |||
54 | struct cuse_conn { | ||
55 | struct list_head list; /* linked on cuse_conntbl */ | ||
56 | struct fuse_conn fc; /* fuse connection */ | ||
57 | struct cdev *cdev; /* associated character device */ | ||
58 | struct device *dev; /* device representing @cdev */ | ||
59 | |||
60 | /* init parameters, set once during initialization */ | ||
61 | bool unrestricted_ioctl; | ||
62 | }; | ||
63 | |||
64 | static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */ | ||
65 | static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN]; | ||
66 | static struct class *cuse_class; | ||
67 | |||
68 | static struct cuse_conn *fc_to_cc(struct fuse_conn *fc) | ||
69 | { | ||
70 | return container_of(fc, struct cuse_conn, fc); | ||
71 | } | ||
72 | |||
73 | static struct list_head *cuse_conntbl_head(dev_t devt) | ||
74 | { | ||
75 | return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN]; | ||
76 | } | ||
77 | |||
78 | |||
79 | /************************************************************************** | ||
80 | * CUSE frontend operations | ||
81 | * | ||
82 | * These are file operations for the character device. | ||
83 | * | ||
84 | * On open, CUSE opens a file from the FUSE mnt and stores it to | ||
85 | * private_data of the open file. All other ops call FUSE ops on the | ||
86 | * FUSE file. | ||
87 | */ | ||
88 | |||
89 | static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, | ||
90 | loff_t *ppos) | ||
91 | { | ||
92 | loff_t pos = 0; | ||
93 | |||
94 | return fuse_direct_io(file, buf, count, &pos, 0); | ||
95 | } | ||
96 | |||
97 | static ssize_t cuse_write(struct file *file, const char __user *buf, | ||
98 | size_t count, loff_t *ppos) | ||
99 | { | ||
100 | loff_t pos = 0; | ||
101 | /* | ||
102 | * No locking or generic_write_checks(), the server is | ||
103 | * responsible for locking and sanity checks. | ||
104 | */ | ||
105 | return fuse_direct_io(file, buf, count, &pos, 1); | ||
106 | } | ||
107 | |||
108 | static int cuse_open(struct inode *inode, struct file *file) | ||
109 | { | ||
110 | dev_t devt = inode->i_cdev->dev; | ||
111 | struct cuse_conn *cc = NULL, *pos; | ||
112 | int rc; | ||
113 | |||
114 | /* look up and get the connection */ | ||
115 | spin_lock(&cuse_lock); | ||
116 | list_for_each_entry(pos, cuse_conntbl_head(devt), list) | ||
117 | if (pos->dev->devt == devt) { | ||
118 | fuse_conn_get(&pos->fc); | ||
119 | cc = pos; | ||
120 | break; | ||
121 | } | ||
122 | spin_unlock(&cuse_lock); | ||
123 | |||
124 | /* dead? */ | ||
125 | if (!cc) | ||
126 | return -ENODEV; | ||
127 | |||
128 | /* | ||
129 | * Generic permission check is already done against the chrdev | ||
130 | * file, proceed to open. | ||
131 | */ | ||
132 | rc = fuse_do_open(&cc->fc, 0, file, 0); | ||
133 | if (rc) | ||
134 | fuse_conn_put(&cc->fc); | ||
135 | return rc; | ||
136 | } | ||
137 | |||
138 | static int cuse_release(struct inode *inode, struct file *file) | ||
139 | { | ||
140 | struct fuse_file *ff = file->private_data; | ||
141 | struct fuse_conn *fc = ff->fc; | ||
142 | |||
143 | fuse_sync_release(ff, file->f_flags); | ||
144 | fuse_conn_put(fc); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | static long cuse_file_ioctl(struct file *file, unsigned int cmd, | ||
150 | unsigned long arg) | ||
151 | { | ||
152 | struct fuse_file *ff = file->private_data; | ||
153 | struct cuse_conn *cc = fc_to_cc(ff->fc); | ||
154 | unsigned int flags = 0; | ||
155 | |||
156 | if (cc->unrestricted_ioctl) | ||
157 | flags |= FUSE_IOCTL_UNRESTRICTED; | ||
158 | |||
159 | return fuse_do_ioctl(file, cmd, arg, flags); | ||
160 | } | ||
161 | |||
162 | static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd, | ||
163 | unsigned long arg) | ||
164 | { | ||
165 | struct fuse_file *ff = file->private_data; | ||
166 | struct cuse_conn *cc = fc_to_cc(ff->fc); | ||
167 | unsigned int flags = FUSE_IOCTL_COMPAT; | ||
168 | |||
169 | if (cc->unrestricted_ioctl) | ||
170 | flags |= FUSE_IOCTL_UNRESTRICTED; | ||
171 | |||
172 | return fuse_do_ioctl(file, cmd, arg, flags); | ||
173 | } | ||
174 | |||
175 | static const struct file_operations cuse_frontend_fops = { | ||
176 | .owner = THIS_MODULE, | ||
177 | .read = cuse_read, | ||
178 | .write = cuse_write, | ||
179 | .open = cuse_open, | ||
180 | .release = cuse_release, | ||
181 | .unlocked_ioctl = cuse_file_ioctl, | ||
182 | .compat_ioctl = cuse_file_compat_ioctl, | ||
183 | .poll = fuse_file_poll, | ||
184 | }; | ||
185 | |||
186 | |||
187 | /************************************************************************** | ||
188 | * CUSE channel initialization and destruction | ||
189 | */ | ||
190 | |||
191 | struct cuse_devinfo { | ||
192 | const char *name; | ||
193 | }; | ||
194 | |||
195 | /** | ||
196 | * cuse_parse_one - parse one key=value pair | ||
197 | * @pp: i/o parameter for the current position | ||
198 | * @end: points to one past the end of the packed string | ||
199 | * @keyp: out parameter for key | ||
200 | * @valp: out parameter for value | ||
201 | * | ||
202 | * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends | ||
203 | * at @end - 1. This function parses one pair and set *@keyp to the | ||
204 | * start of the key and *@valp to the start of the value. Note that | ||
205 | * the original string is modified such that the key string is | ||
206 | * terminated with '\0'. *@pp is updated to point to the next string. | ||
207 | * | ||
208 | * RETURNS: | ||
209 | * 1 on successful parse, 0 on EOF, -errno on failure. | ||
210 | */ | ||
211 | static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) | ||
212 | { | ||
213 | char *p = *pp; | ||
214 | char *key, *val; | ||
215 | |||
216 | while (p < end && *p == '\0') | ||
217 | p++; | ||
218 | if (p == end) | ||
219 | return 0; | ||
220 | |||
221 | if (end[-1] != '\0') { | ||
222 | printk(KERN_ERR "CUSE: info not properly terminated\n"); | ||
223 | return -EINVAL; | ||
224 | } | ||
225 | |||
226 | key = val = p; | ||
227 | p += strlen(p); | ||
228 | |||
229 | if (valp) { | ||
230 | strsep(&val, "="); | ||
231 | if (!val) | ||
232 | val = key + strlen(key); | ||
233 | key = strstrip(key); | ||
234 | val = strstrip(val); | ||
235 | } else | ||
236 | key = strstrip(key); | ||
237 | |||
238 | if (!strlen(key)) { | ||
239 | printk(KERN_ERR "CUSE: zero length info key specified\n"); | ||
240 | return -EINVAL; | ||
241 | } | ||
242 | |||
243 | *pp = p; | ||
244 | *keyp = key; | ||
245 | if (valp) | ||
246 | *valp = val; | ||
247 | |||
248 | return 1; | ||
249 | } | ||
250 | |||
251 | /** | ||
252 | * cuse_parse_dev_info - parse device info | ||
253 | * @p: device info string | ||
254 | * @len: length of device info string | ||
255 | * @devinfo: out parameter for parsed device info | ||
256 | * | ||
257 | * Parse @p to extract device info and store it into @devinfo. String | ||
258 | * pointed to by @p is modified by parsing and @devinfo points into | ||
259 | * them, so @p shouldn't be freed while @devinfo is in use. | ||
260 | * | ||
261 | * RETURNS: | ||
262 | * 0 on success, -errno on failure. | ||
263 | */ | ||
264 | static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) | ||
265 | { | ||
266 | char *end = p + len; | ||
267 | char *key, *val; | ||
268 | int rc; | ||
269 | |||
270 | while (true) { | ||
271 | rc = cuse_parse_one(&p, end, &key, &val); | ||
272 | if (rc < 0) | ||
273 | return rc; | ||
274 | if (!rc) | ||
275 | break; | ||
276 | if (strcmp(key, "DEVNAME") == 0) | ||
277 | devinfo->name = val; | ||
278 | else | ||
279 | printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n", | ||
280 | key); | ||
281 | } | ||
282 | |||
283 | if (!devinfo->name || !strlen(devinfo->name)) { | ||
284 | printk(KERN_ERR "CUSE: DEVNAME unspecified\n"); | ||
285 | return -EINVAL; | ||
286 | } | ||
287 | |||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static void cuse_gendev_release(struct device *dev) | ||
292 | { | ||
293 | kfree(dev); | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * cuse_process_init_reply - finish initializing CUSE channel | ||
298 | * | ||
299 | * This function creates the character device and sets up all the | ||
300 | * required data structures for it. Please read the comment at the | ||
301 | * top of this file for high level overview. | ||
302 | */ | ||
303 | static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | ||
304 | { | ||
305 | struct cuse_conn *cc = fc_to_cc(fc); | ||
306 | struct cuse_init_out *arg = &req->misc.cuse_init_out; | ||
307 | struct page *page = req->pages[0]; | ||
308 | struct cuse_devinfo devinfo = { }; | ||
309 | struct device *dev; | ||
310 | struct cdev *cdev; | ||
311 | dev_t devt; | ||
312 | int rc; | ||
313 | |||
314 | if (req->out.h.error || | ||
315 | arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { | ||
316 | goto err; | ||
317 | } | ||
318 | |||
319 | fc->minor = arg->minor; | ||
320 | fc->max_read = max_t(unsigned, arg->max_read, 4096); | ||
321 | fc->max_write = max_t(unsigned, arg->max_write, 4096); | ||
322 | |||
323 | /* parse init reply */ | ||
324 | cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; | ||
325 | |||
326 | rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size, | ||
327 | &devinfo); | ||
328 | if (rc) | ||
329 | goto err; | ||
330 | |||
331 | /* determine and reserve devt */ | ||
332 | devt = MKDEV(arg->dev_major, arg->dev_minor); | ||
333 | if (!MAJOR(devt)) | ||
334 | rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name); | ||
335 | else | ||
336 | rc = register_chrdev_region(devt, 1, devinfo.name); | ||
337 | if (rc) { | ||
338 | printk(KERN_ERR "CUSE: failed to register chrdev region\n"); | ||
339 | goto err; | ||
340 | } | ||
341 | |||
342 | /* devt determined, create device */ | ||
343 | rc = -ENOMEM; | ||
344 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | ||
345 | if (!dev) | ||
346 | goto err_region; | ||
347 | |||
348 | device_initialize(dev); | ||
349 | dev_set_uevent_suppress(dev, 1); | ||
350 | dev->class = cuse_class; | ||
351 | dev->devt = devt; | ||
352 | dev->release = cuse_gendev_release; | ||
353 | dev_set_drvdata(dev, cc); | ||
354 | dev_set_name(dev, "%s", devinfo.name); | ||
355 | |||
356 | rc = device_add(dev); | ||
357 | if (rc) | ||
358 | goto err_device; | ||
359 | |||
360 | /* register cdev */ | ||
361 | rc = -ENOMEM; | ||
362 | cdev = cdev_alloc(); | ||
363 | if (!cdev) | ||
364 | goto err_device; | ||
365 | |||
366 | cdev->owner = THIS_MODULE; | ||
367 | cdev->ops = &cuse_frontend_fops; | ||
368 | |||
369 | rc = cdev_add(cdev, devt, 1); | ||
370 | if (rc) | ||
371 | goto err_cdev; | ||
372 | |||
373 | cc->dev = dev; | ||
374 | cc->cdev = cdev; | ||
375 | |||
376 | /* make the device available */ | ||
377 | spin_lock(&cuse_lock); | ||
378 | list_add(&cc->list, cuse_conntbl_head(devt)); | ||
379 | spin_unlock(&cuse_lock); | ||
380 | |||
381 | /* announce device availability */ | ||
382 | dev_set_uevent_suppress(dev, 0); | ||
383 | kobject_uevent(&dev->kobj, KOBJ_ADD); | ||
384 | out: | ||
385 | __free_page(page); | ||
386 | return; | ||
387 | |||
388 | err_cdev: | ||
389 | cdev_del(cdev); | ||
390 | err_device: | ||
391 | put_device(dev); | ||
392 | err_region: | ||
393 | unregister_chrdev_region(devt, 1); | ||
394 | err: | ||
395 | fc->conn_error = 1; | ||
396 | goto out; | ||
397 | } | ||
398 | |||
399 | static int cuse_send_init(struct cuse_conn *cc) | ||
400 | { | ||
401 | int rc; | ||
402 | struct fuse_req *req; | ||
403 | struct page *page; | ||
404 | struct fuse_conn *fc = &cc->fc; | ||
405 | struct cuse_init_in *arg; | ||
406 | |||
407 | BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); | ||
408 | |||
409 | req = fuse_get_req(fc); | ||
410 | if (IS_ERR(req)) { | ||
411 | rc = PTR_ERR(req); | ||
412 | goto err; | ||
413 | } | ||
414 | |||
415 | rc = -ENOMEM; | ||
416 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
417 | if (!page) | ||
418 | goto err_put_req; | ||
419 | |||
420 | arg = &req->misc.cuse_init_in; | ||
421 | arg->major = FUSE_KERNEL_VERSION; | ||
422 | arg->minor = FUSE_KERNEL_MINOR_VERSION; | ||
423 | arg->flags |= CUSE_UNRESTRICTED_IOCTL; | ||
424 | req->in.h.opcode = CUSE_INIT; | ||
425 | req->in.numargs = 1; | ||
426 | req->in.args[0].size = sizeof(struct cuse_init_in); | ||
427 | req->in.args[0].value = arg; | ||
428 | req->out.numargs = 2; | ||
429 | req->out.args[0].size = sizeof(struct cuse_init_out); | ||
430 | req->out.args[0].value = &req->misc.cuse_init_out; | ||
431 | req->out.args[1].size = CUSE_INIT_INFO_MAX; | ||
432 | req->out.argvar = 1; | ||
433 | req->out.argpages = 1; | ||
434 | req->pages[0] = page; | ||
435 | req->num_pages = 1; | ||
436 | req->end = cuse_process_init_reply; | ||
437 | fuse_request_send_background(fc, req); | ||
438 | |||
439 | return 0; | ||
440 | |||
441 | err_put_req: | ||
442 | fuse_put_request(fc, req); | ||
443 | err: | ||
444 | return rc; | ||
445 | } | ||
446 | |||
447 | static void cuse_fc_release(struct fuse_conn *fc) | ||
448 | { | ||
449 | struct cuse_conn *cc = fc_to_cc(fc); | ||
450 | kfree(cc); | ||
451 | } | ||
452 | |||
453 | /** | ||
454 | * cuse_channel_open - open method for /dev/cuse | ||
455 | * @inode: inode for /dev/cuse | ||
456 | * @file: file struct being opened | ||
457 | * | ||
458 | * Userland CUSE server can create a CUSE device by opening /dev/cuse | ||
459 | * and replying to the initilaization request kernel sends. This | ||
460 | * function is responsible for handling CUSE device initialization. | ||
461 | * Because the fd opened by this function is used during | ||
462 | * initialization, this function only creates cuse_conn and sends | ||
463 | * init. The rest is delegated to a kthread. | ||
464 | * | ||
465 | * RETURNS: | ||
466 | * 0 on success, -errno on failure. | ||
467 | */ | ||
468 | static int cuse_channel_open(struct inode *inode, struct file *file) | ||
469 | { | ||
470 | struct cuse_conn *cc; | ||
471 | int rc; | ||
472 | |||
473 | /* set up cuse_conn */ | ||
474 | cc = kzalloc(sizeof(*cc), GFP_KERNEL); | ||
475 | if (!cc) | ||
476 | return -ENOMEM; | ||
477 | |||
478 | fuse_conn_init(&cc->fc); | ||
479 | |||
480 | INIT_LIST_HEAD(&cc->list); | ||
481 | cc->fc.release = cuse_fc_release; | ||
482 | |||
483 | cc->fc.connected = 1; | ||
484 | cc->fc.blocked = 0; | ||
485 | rc = cuse_send_init(cc); | ||
486 | if (rc) { | ||
487 | fuse_conn_put(&cc->fc); | ||
488 | return rc; | ||
489 | } | ||
490 | file->private_data = &cc->fc; /* channel owns base reference to cc */ | ||
491 | |||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | /** | ||
496 | * cuse_channel_release - release method for /dev/cuse | ||
497 | * @inode: inode for /dev/cuse | ||
498 | * @file: file struct being closed | ||
499 | * | ||
500 | * Disconnect the channel, deregister CUSE device and initiate | ||
501 | * destruction by putting the default reference. | ||
502 | * | ||
503 | * RETURNS: | ||
504 | * 0 on success, -errno on failure. | ||
505 | */ | ||
506 | static int cuse_channel_release(struct inode *inode, struct file *file) | ||
507 | { | ||
508 | struct cuse_conn *cc = fc_to_cc(file->private_data); | ||
509 | int rc; | ||
510 | |||
511 | /* remove from the conntbl, no more access from this point on */ | ||
512 | spin_lock(&cuse_lock); | ||
513 | list_del_init(&cc->list); | ||
514 | spin_unlock(&cuse_lock); | ||
515 | |||
516 | /* remove device */ | ||
517 | if (cc->dev) | ||
518 | device_unregister(cc->dev); | ||
519 | if (cc->cdev) { | ||
520 | unregister_chrdev_region(cc->cdev->dev, 1); | ||
521 | cdev_del(cc->cdev); | ||
522 | } | ||
523 | |||
524 | /* kill connection and shutdown channel */ | ||
525 | fuse_conn_kill(&cc->fc); | ||
526 | rc = fuse_dev_release(inode, file); /* puts the base reference */ | ||
527 | |||
528 | return rc; | ||
529 | } | ||
530 | |||
531 | static struct file_operations cuse_channel_fops; /* initialized during init */ | ||
532 | |||
533 | |||
534 | /************************************************************************** | ||
535 | * Misc stuff and module initializatiion | ||
536 | * | ||
537 | * CUSE exports the same set of attributes to sysfs as fusectl. | ||
538 | */ | ||
539 | |||
540 | static ssize_t cuse_class_waiting_show(struct device *dev, | ||
541 | struct device_attribute *attr, char *buf) | ||
542 | { | ||
543 | struct cuse_conn *cc = dev_get_drvdata(dev); | ||
544 | |||
545 | return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); | ||
546 | } | ||
547 | |||
548 | static ssize_t cuse_class_abort_store(struct device *dev, | ||
549 | struct device_attribute *attr, | ||
550 | const char *buf, size_t count) | ||
551 | { | ||
552 | struct cuse_conn *cc = dev_get_drvdata(dev); | ||
553 | |||
554 | fuse_abort_conn(&cc->fc); | ||
555 | return count; | ||
556 | } | ||
557 | |||
558 | static struct device_attribute cuse_class_dev_attrs[] = { | ||
559 | __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL), | ||
560 | __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store), | ||
561 | { } | ||
562 | }; | ||
563 | |||
564 | static struct miscdevice cuse_miscdev = { | ||
565 | .minor = MISC_DYNAMIC_MINOR, | ||
566 | .name = "cuse", | ||
567 | .fops = &cuse_channel_fops, | ||
568 | }; | ||
569 | |||
570 | static int __init cuse_init(void) | ||
571 | { | ||
572 | int i, rc; | ||
573 | |||
574 | /* init conntbl */ | ||
575 | for (i = 0; i < CUSE_CONNTBL_LEN; i++) | ||
576 | INIT_LIST_HEAD(&cuse_conntbl[i]); | ||
577 | |||
578 | /* inherit and extend fuse_dev_operations */ | ||
579 | cuse_channel_fops = fuse_dev_operations; | ||
580 | cuse_channel_fops.owner = THIS_MODULE; | ||
581 | cuse_channel_fops.open = cuse_channel_open; | ||
582 | cuse_channel_fops.release = cuse_channel_release; | ||
583 | |||
584 | cuse_class = class_create(THIS_MODULE, "cuse"); | ||
585 | if (IS_ERR(cuse_class)) | ||
586 | return PTR_ERR(cuse_class); | ||
587 | |||
588 | cuse_class->dev_attrs = cuse_class_dev_attrs; | ||
589 | |||
590 | rc = misc_register(&cuse_miscdev); | ||
591 | if (rc) { | ||
592 | class_destroy(cuse_class); | ||
593 | return rc; | ||
594 | } | ||
595 | |||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | static void __exit cuse_exit(void) | ||
600 | { | ||
601 | misc_deregister(&cuse_miscdev); | ||
602 | class_destroy(cuse_class); | ||
603 | } | ||
604 | |||
605 | module_init(cuse_init); | ||
606 | module_exit(cuse_exit); | ||
607 | |||
608 | MODULE_AUTHOR("Tejun Heo <tj@kernel.org>"); | ||
609 | MODULE_DESCRIPTION("Character device in Userspace"); | ||
610 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ba76b68c52ff..8fed2ed12f38 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void) | |||
46 | fuse_request_init(req); | 46 | fuse_request_init(req); |
47 | return req; | 47 | return req; |
48 | } | 48 | } |
49 | EXPORT_SYMBOL_GPL(fuse_request_alloc); | ||
49 | 50 | ||
50 | struct fuse_req *fuse_request_alloc_nofs(void) | 51 | struct fuse_req *fuse_request_alloc_nofs(void) |
51 | { | 52 | { |
@@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) | |||
124 | atomic_dec(&fc->num_waiting); | 125 | atomic_dec(&fc->num_waiting); |
125 | return ERR_PTR(err); | 126 | return ERR_PTR(err); |
126 | } | 127 | } |
128 | EXPORT_SYMBOL_GPL(fuse_get_req); | ||
127 | 129 | ||
128 | /* | 130 | /* |
129 | * Return request in fuse_file->reserved_req. However that may | 131 | * Return request in fuse_file->reserved_req. However that may |
@@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) | |||
208 | fuse_request_free(req); | 210 | fuse_request_free(req); |
209 | } | 211 | } |
210 | } | 212 | } |
213 | EXPORT_SYMBOL_GPL(fuse_put_request); | ||
211 | 214 | ||
212 | static unsigned len_args(unsigned numargs, struct fuse_arg *args) | 215 | static unsigned len_args(unsigned numargs, struct fuse_arg *args) |
213 | { | 216 | { |
@@ -282,7 +285,7 @@ __releases(&fc->lock) | |||
282 | wake_up_all(&fc->blocked_waitq); | 285 | wake_up_all(&fc->blocked_waitq); |
283 | } | 286 | } |
284 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && | 287 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && |
285 | fc->connected) { | 288 | fc->connected && fc->bdi_initialized) { |
286 | clear_bdi_congested(&fc->bdi, READ); | 289 | clear_bdi_congested(&fc->bdi, READ); |
287 | clear_bdi_congested(&fc->bdi, WRITE); | 290 | clear_bdi_congested(&fc->bdi, WRITE); |
288 | } | 291 | } |
@@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) | |||
400 | } | 403 | } |
401 | spin_unlock(&fc->lock); | 404 | spin_unlock(&fc->lock); |
402 | } | 405 | } |
406 | EXPORT_SYMBOL_GPL(fuse_request_send); | ||
403 | 407 | ||
404 | static void fuse_request_send_nowait_locked(struct fuse_conn *fc, | 408 | static void fuse_request_send_nowait_locked(struct fuse_conn *fc, |
405 | struct fuse_req *req) | 409 | struct fuse_req *req) |
@@ -408,7 +412,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, | |||
408 | fc->num_background++; | 412 | fc->num_background++; |
409 | if (fc->num_background == FUSE_MAX_BACKGROUND) | 413 | if (fc->num_background == FUSE_MAX_BACKGROUND) |
410 | fc->blocked = 1; | 414 | fc->blocked = 1; |
411 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { | 415 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && |
416 | fc->bdi_initialized) { | ||
412 | set_bdi_congested(&fc->bdi, READ); | 417 | set_bdi_congested(&fc->bdi, READ); |
413 | set_bdi_congested(&fc->bdi, WRITE); | 418 | set_bdi_congested(&fc->bdi, WRITE); |
414 | } | 419 | } |
@@ -439,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) | |||
439 | req->isreply = 1; | 444 | req->isreply = 1; |
440 | fuse_request_send_nowait(fc, req); | 445 | fuse_request_send_nowait(fc, req); |
441 | } | 446 | } |
447 | EXPORT_SYMBOL_GPL(fuse_request_send_background); | ||
442 | 448 | ||
443 | /* | 449 | /* |
444 | * Called under fc->lock | 450 | * Called under fc->lock |
@@ -1105,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc) | |||
1105 | } | 1111 | } |
1106 | spin_unlock(&fc->lock); | 1112 | spin_unlock(&fc->lock); |
1107 | } | 1113 | } |
1114 | EXPORT_SYMBOL_GPL(fuse_abort_conn); | ||
1108 | 1115 | ||
1109 | static int fuse_dev_release(struct inode *inode, struct file *file) | 1116 | int fuse_dev_release(struct inode *inode, struct file *file) |
1110 | { | 1117 | { |
1111 | struct fuse_conn *fc = fuse_get_conn(file); | 1118 | struct fuse_conn *fc = fuse_get_conn(file); |
1112 | if (fc) { | 1119 | if (fc) { |
@@ -1120,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file) | |||
1120 | 1127 | ||
1121 | return 0; | 1128 | return 0; |
1122 | } | 1129 | } |
1130 | EXPORT_SYMBOL_GPL(fuse_dev_release); | ||
1123 | 1131 | ||
1124 | static int fuse_dev_fasync(int fd, struct file *file, int on) | 1132 | static int fuse_dev_fasync(int fd, struct file *file, int on) |
1125 | { | 1133 | { |
@@ -1142,6 +1150,7 @@ const struct file_operations fuse_dev_operations = { | |||
1142 | .release = fuse_dev_release, | 1150 | .release = fuse_dev_release, |
1143 | .fasync = fuse_dev_fasync, | 1151 | .fasync = fuse_dev_fasync, |
1144 | }; | 1152 | }; |
1153 | EXPORT_SYMBOL_GPL(fuse_dev_operations); | ||
1145 | 1154 | ||
1146 | static struct miscdevice fuse_miscdevice = { | 1155 | static struct miscdevice fuse_miscdevice = { |
1147 | .minor = FUSE_MINOR, | 1156 | .minor = FUSE_MINOR, |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8b8eebc5614b..b3089a083d30 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, | |||
362 | } | 362 | } |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * Synchronous release for the case when something goes wrong in CREATE_OPEN | ||
366 | */ | ||
367 | static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, | ||
368 | u64 nodeid, int flags) | ||
369 | { | ||
370 | fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); | ||
371 | ff->reserved_req->force = 1; | ||
372 | fuse_request_send(fc, ff->reserved_req); | ||
373 | fuse_put_request(fc, ff->reserved_req); | ||
374 | kfree(ff); | ||
375 | } | ||
376 | |||
377 | /* | ||
378 | * Atomic create+open operation | 365 | * Atomic create+open operation |
379 | * | 366 | * |
380 | * If the filesystem doesn't support this, then fall back to separate | 367 | * If the filesystem doesn't support this, then fall back to separate |
@@ -445,12 +432,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, | |||
445 | goto out_free_ff; | 432 | goto out_free_ff; |
446 | 433 | ||
447 | fuse_put_request(fc, req); | 434 | fuse_put_request(fc, req); |
435 | ff->fh = outopen.fh; | ||
436 | ff->nodeid = outentry.nodeid; | ||
437 | ff->open_flags = outopen.open_flags; | ||
448 | inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, | 438 | inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, |
449 | &outentry.attr, entry_attr_timeout(&outentry), 0); | 439 | &outentry.attr, entry_attr_timeout(&outentry), 0); |
450 | if (!inode) { | 440 | if (!inode) { |
451 | flags &= ~(O_CREAT | O_EXCL | O_TRUNC); | 441 | flags &= ~(O_CREAT | O_EXCL | O_TRUNC); |
452 | ff->fh = outopen.fh; | 442 | fuse_sync_release(ff, flags); |
453 | fuse_sync_release(fc, ff, outentry.nodeid, flags); | ||
454 | fuse_send_forget(fc, forget_req, outentry.nodeid, 1); | 443 | fuse_send_forget(fc, forget_req, outentry.nodeid, 1); |
455 | return -ENOMEM; | 444 | return -ENOMEM; |
456 | } | 445 | } |
@@ -460,11 +449,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, | |||
460 | fuse_invalidate_attr(dir); | 449 | fuse_invalidate_attr(dir); |
461 | file = lookup_instantiate_filp(nd, entry, generic_file_open); | 450 | file = lookup_instantiate_filp(nd, entry, generic_file_open); |
462 | if (IS_ERR(file)) { | 451 | if (IS_ERR(file)) { |
463 | ff->fh = outopen.fh; | 452 | fuse_sync_release(ff, flags); |
464 | fuse_sync_release(fc, ff, outentry.nodeid, flags); | ||
465 | return PTR_ERR(file); | 453 | return PTR_ERR(file); |
466 | } | 454 | } |
467 | fuse_finish_open(inode, file, ff, &outopen); | 455 | file->private_data = fuse_file_get(ff); |
456 | fuse_finish_open(inode, file); | ||
468 | return 0; | 457 | return 0; |
469 | 458 | ||
470 | out_free_ff: | 459 | out_free_ff: |
@@ -1035,7 +1024,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
1035 | req->out.argpages = 1; | 1024 | req->out.argpages = 1; |
1036 | req->num_pages = 1; | 1025 | req->num_pages = 1; |
1037 | req->pages[0] = page; | 1026 | req->pages[0] = page; |
1038 | fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); | 1027 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); |
1039 | fuse_request_send(fc, req); | 1028 | fuse_request_send(fc, req); |
1040 | nbytes = req->out.args[0].size; | 1029 | nbytes = req->out.args[0].size; |
1041 | err = req->out.h.error; | 1030 | err = req->out.h.error; |
@@ -1101,12 +1090,14 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) | |||
1101 | 1090 | ||
1102 | static int fuse_dir_open(struct inode *inode, struct file *file) | 1091 | static int fuse_dir_open(struct inode *inode, struct file *file) |
1103 | { | 1092 | { |
1104 | return fuse_open_common(inode, file, 1); | 1093 | return fuse_open_common(inode, file, true); |
1105 | } | 1094 | } |
1106 | 1095 | ||
1107 | static int fuse_dir_release(struct inode *inode, struct file *file) | 1096 | static int fuse_dir_release(struct inode *inode, struct file *file) |
1108 | { | 1097 | { |
1109 | return fuse_release_common(inode, file, 1); | 1098 | fuse_release_common(file, FUSE_RELEASEDIR); |
1099 | |||
1100 | return 0; | ||
1110 | } | 1101 | } |
1111 | 1102 | ||
1112 | static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) | 1103 | static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06f30e965676..fce6ce694fde 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -12,13 +12,13 @@ | |||
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/module.h> | ||
15 | 16 | ||
16 | static const struct file_operations fuse_direct_io_file_operations; | 17 | static const struct file_operations fuse_direct_io_file_operations; |
17 | 18 | ||
18 | static int fuse_send_open(struct inode *inode, struct file *file, int isdir, | 19 | static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
19 | struct fuse_open_out *outargp) | 20 | int opcode, struct fuse_open_out *outargp) |
20 | { | 21 | { |
21 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
22 | struct fuse_open_in inarg; | 22 | struct fuse_open_in inarg; |
23 | struct fuse_req *req; | 23 | struct fuse_req *req; |
24 | int err; | 24 | int err; |
@@ -31,8 +31,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, | |||
31 | inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); | 31 | inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); |
32 | if (!fc->atomic_o_trunc) | 32 | if (!fc->atomic_o_trunc) |
33 | inarg.flags &= ~O_TRUNC; | 33 | inarg.flags &= ~O_TRUNC; |
34 | req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; | 34 | req->in.h.opcode = opcode; |
35 | req->in.h.nodeid = get_node_id(inode); | 35 | req->in.h.nodeid = nodeid; |
36 | req->in.numargs = 1; | 36 | req->in.numargs = 1; |
37 | req->in.args[0].size = sizeof(inarg); | 37 | req->in.args[0].size = sizeof(inarg); |
38 | req->in.args[0].value = &inarg; | 38 | req->in.args[0].value = &inarg; |
@@ -49,22 +49,27 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, | |||
49 | struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) | 49 | struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) |
50 | { | 50 | { |
51 | struct fuse_file *ff; | 51 | struct fuse_file *ff; |
52 | |||
52 | ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); | 53 | ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); |
53 | if (ff) { | 54 | if (unlikely(!ff)) |
54 | ff->reserved_req = fuse_request_alloc(); | 55 | return NULL; |
55 | if (!ff->reserved_req) { | 56 | |
56 | kfree(ff); | 57 | ff->fc = fc; |
57 | return NULL; | 58 | ff->reserved_req = fuse_request_alloc(); |
58 | } else { | 59 | if (unlikely(!ff->reserved_req)) { |
59 | INIT_LIST_HEAD(&ff->write_entry); | 60 | kfree(ff); |
60 | atomic_set(&ff->count, 0); | 61 | return NULL; |
61 | spin_lock(&fc->lock); | ||
62 | ff->kh = ++fc->khctr; | ||
63 | spin_unlock(&fc->lock); | ||
64 | } | ||
65 | RB_CLEAR_NODE(&ff->polled_node); | ||
66 | init_waitqueue_head(&ff->poll_wait); | ||
67 | } | 62 | } |
63 | |||
64 | INIT_LIST_HEAD(&ff->write_entry); | ||
65 | atomic_set(&ff->count, 0); | ||
66 | RB_CLEAR_NODE(&ff->polled_node); | ||
67 | init_waitqueue_head(&ff->poll_wait); | ||
68 | |||
69 | spin_lock(&fc->lock); | ||
70 | ff->kh = ++fc->khctr; | ||
71 | spin_unlock(&fc->lock); | ||
72 | |||
68 | return ff; | 73 | return ff; |
69 | } | 74 | } |
70 | 75 | ||
@@ -74,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff) | |||
74 | kfree(ff); | 79 | kfree(ff); |
75 | } | 80 | } |
76 | 81 | ||
77 | static struct fuse_file *fuse_file_get(struct fuse_file *ff) | 82 | struct fuse_file *fuse_file_get(struct fuse_file *ff) |
78 | { | 83 | { |
79 | atomic_inc(&ff->count); | 84 | atomic_inc(&ff->count); |
80 | return ff; | 85 | return ff; |
@@ -82,40 +87,65 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) | |||
82 | 87 | ||
83 | static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) | 88 | static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) |
84 | { | 89 | { |
85 | dput(req->misc.release.dentry); | 90 | path_put(&req->misc.release.path); |
86 | mntput(req->misc.release.vfsmount); | ||
87 | } | 91 | } |
88 | 92 | ||
89 | static void fuse_file_put(struct fuse_file *ff) | 93 | static void fuse_file_put(struct fuse_file *ff) |
90 | { | 94 | { |
91 | if (atomic_dec_and_test(&ff->count)) { | 95 | if (atomic_dec_and_test(&ff->count)) { |
92 | struct fuse_req *req = ff->reserved_req; | 96 | struct fuse_req *req = ff->reserved_req; |
93 | struct inode *inode = req->misc.release.dentry->d_inode; | 97 | |
94 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
95 | req->end = fuse_release_end; | 98 | req->end = fuse_release_end; |
96 | fuse_request_send_background(fc, req); | 99 | fuse_request_send_background(ff->fc, req); |
97 | kfree(ff); | 100 | kfree(ff); |
98 | } | 101 | } |
99 | } | 102 | } |
100 | 103 | ||
101 | void fuse_finish_open(struct inode *inode, struct file *file, | 104 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
102 | struct fuse_file *ff, struct fuse_open_out *outarg) | 105 | bool isdir) |
103 | { | 106 | { |
104 | if (outarg->open_flags & FOPEN_DIRECT_IO) | 107 | struct fuse_open_out outarg; |
108 | struct fuse_file *ff; | ||
109 | int err; | ||
110 | int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; | ||
111 | |||
112 | ff = fuse_file_alloc(fc); | ||
113 | if (!ff) | ||
114 | return -ENOMEM; | ||
115 | |||
116 | err = fuse_send_open(fc, nodeid, file, opcode, &outarg); | ||
117 | if (err) { | ||
118 | fuse_file_free(ff); | ||
119 | return err; | ||
120 | } | ||
121 | |||
122 | if (isdir) | ||
123 | outarg.open_flags &= ~FOPEN_DIRECT_IO; | ||
124 | |||
125 | ff->fh = outarg.fh; | ||
126 | ff->nodeid = nodeid; | ||
127 | ff->open_flags = outarg.open_flags; | ||
128 | file->private_data = fuse_file_get(ff); | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | EXPORT_SYMBOL_GPL(fuse_do_open); | ||
133 | |||
134 | void fuse_finish_open(struct inode *inode, struct file *file) | ||
135 | { | ||
136 | struct fuse_file *ff = file->private_data; | ||
137 | |||
138 | if (ff->open_flags & FOPEN_DIRECT_IO) | ||
105 | file->f_op = &fuse_direct_io_file_operations; | 139 | file->f_op = &fuse_direct_io_file_operations; |
106 | if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) | 140 | if (!(ff->open_flags & FOPEN_KEEP_CACHE)) |
107 | invalidate_inode_pages2(inode->i_mapping); | 141 | invalidate_inode_pages2(inode->i_mapping); |
108 | if (outarg->open_flags & FOPEN_NONSEEKABLE) | 142 | if (ff->open_flags & FOPEN_NONSEEKABLE) |
109 | nonseekable_open(inode, file); | 143 | nonseekable_open(inode, file); |
110 | ff->fh = outarg->fh; | ||
111 | file->private_data = fuse_file_get(ff); | ||
112 | } | 144 | } |
113 | 145 | ||
114 | int fuse_open_common(struct inode *inode, struct file *file, int isdir) | 146 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir) |
115 | { | 147 | { |
116 | struct fuse_conn *fc = get_fuse_conn(inode); | 148 | struct fuse_conn *fc = get_fuse_conn(inode); |
117 | struct fuse_open_out outarg; | ||
118 | struct fuse_file *ff; | ||
119 | int err; | 149 | int err; |
120 | 150 | ||
121 | /* VFS checks this, but only _after_ ->open() */ | 151 | /* VFS checks this, but only _after_ ->open() */ |
@@ -126,78 +156,85 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) | |||
126 | if (err) | 156 | if (err) |
127 | return err; | 157 | return err; |
128 | 158 | ||
129 | ff = fuse_file_alloc(fc); | 159 | err = fuse_do_open(fc, get_node_id(inode), file, isdir); |
130 | if (!ff) | ||
131 | return -ENOMEM; | ||
132 | |||
133 | err = fuse_send_open(inode, file, isdir, &outarg); | ||
134 | if (err) | 160 | if (err) |
135 | fuse_file_free(ff); | 161 | return err; |
136 | else { | ||
137 | if (isdir) | ||
138 | outarg.open_flags &= ~FOPEN_DIRECT_IO; | ||
139 | fuse_finish_open(inode, file, ff, &outarg); | ||
140 | } | ||
141 | 162 | ||
142 | return err; | 163 | fuse_finish_open(inode, file); |
164 | |||
165 | return 0; | ||
143 | } | 166 | } |
144 | 167 | ||
145 | void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) | 168 | static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) |
146 | { | 169 | { |
170 | struct fuse_conn *fc = ff->fc; | ||
147 | struct fuse_req *req = ff->reserved_req; | 171 | struct fuse_req *req = ff->reserved_req; |
148 | struct fuse_release_in *inarg = &req->misc.release.in; | 172 | struct fuse_release_in *inarg = &req->misc.release.in; |
149 | 173 | ||
174 | spin_lock(&fc->lock); | ||
175 | list_del(&ff->write_entry); | ||
176 | if (!RB_EMPTY_NODE(&ff->polled_node)) | ||
177 | rb_erase(&ff->polled_node, &fc->polled_files); | ||
178 | spin_unlock(&fc->lock); | ||
179 | |||
180 | wake_up_interruptible_sync(&ff->poll_wait); | ||
181 | |||
150 | inarg->fh = ff->fh; | 182 | inarg->fh = ff->fh; |
151 | inarg->flags = flags; | 183 | inarg->flags = flags; |
152 | req->in.h.opcode = opcode; | 184 | req->in.h.opcode = opcode; |
153 | req->in.h.nodeid = nodeid; | 185 | req->in.h.nodeid = ff->nodeid; |
154 | req->in.numargs = 1; | 186 | req->in.numargs = 1; |
155 | req->in.args[0].size = sizeof(struct fuse_release_in); | 187 | req->in.args[0].size = sizeof(struct fuse_release_in); |
156 | req->in.args[0].value = inarg; | 188 | req->in.args[0].value = inarg; |
157 | } | 189 | } |
158 | 190 | ||
159 | int fuse_release_common(struct inode *inode, struct file *file, int isdir) | 191 | void fuse_release_common(struct file *file, int opcode) |
160 | { | 192 | { |
161 | struct fuse_file *ff = file->private_data; | 193 | struct fuse_file *ff; |
162 | if (ff) { | 194 | struct fuse_req *req; |
163 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
164 | struct fuse_req *req = ff->reserved_req; | ||
165 | |||
166 | fuse_release_fill(ff, get_node_id(inode), file->f_flags, | ||
167 | isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); | ||
168 | 195 | ||
169 | /* Hold vfsmount and dentry until release is finished */ | 196 | ff = file->private_data; |
170 | req->misc.release.vfsmount = mntget(file->f_path.mnt); | 197 | if (unlikely(!ff)) |
171 | req->misc.release.dentry = dget(file->f_path.dentry); | 198 | return; |
172 | 199 | ||
173 | spin_lock(&fc->lock); | 200 | req = ff->reserved_req; |
174 | list_del(&ff->write_entry); | 201 | fuse_prepare_release(ff, file->f_flags, opcode); |
175 | if (!RB_EMPTY_NODE(&ff->polled_node)) | ||
176 | rb_erase(&ff->polled_node, &fc->polled_files); | ||
177 | spin_unlock(&fc->lock); | ||
178 | 202 | ||
179 | wake_up_interruptible_sync(&ff->poll_wait); | 203 | /* Hold vfsmount and dentry until release is finished */ |
180 | /* | 204 | path_get(&file->f_path); |
181 | * Normally this will send the RELEASE request, | 205 | req->misc.release.path = file->f_path; |
182 | * however if some asynchronous READ or WRITE requests | ||
183 | * are outstanding, the sending will be delayed | ||
184 | */ | ||
185 | fuse_file_put(ff); | ||
186 | } | ||
187 | 206 | ||
188 | /* Return value is ignored by VFS */ | 207 | /* |
189 | return 0; | 208 | * Normally this will send the RELEASE request, however if |
209 | * some asynchronous READ or WRITE requests are outstanding, | ||
210 | * the sending will be delayed. | ||
211 | */ | ||
212 | fuse_file_put(ff); | ||
190 | } | 213 | } |
191 | 214 | ||
192 | static int fuse_open(struct inode *inode, struct file *file) | 215 | static int fuse_open(struct inode *inode, struct file *file) |
193 | { | 216 | { |
194 | return fuse_open_common(inode, file, 0); | 217 | return fuse_open_common(inode, file, false); |
195 | } | 218 | } |
196 | 219 | ||
197 | static int fuse_release(struct inode *inode, struct file *file) | 220 | static int fuse_release(struct inode *inode, struct file *file) |
198 | { | 221 | { |
199 | return fuse_release_common(inode, file, 0); | 222 | fuse_release_common(file, FUSE_RELEASE); |
223 | |||
224 | /* return value is ignored by VFS */ | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | void fuse_sync_release(struct fuse_file *ff, int flags) | ||
229 | { | ||
230 | WARN_ON(atomic_read(&ff->count) > 1); | ||
231 | fuse_prepare_release(ff, flags, FUSE_RELEASE); | ||
232 | ff->reserved_req->force = 1; | ||
233 | fuse_request_send(ff->fc, ff->reserved_req); | ||
234 | fuse_put_request(ff->fc, ff->reserved_req); | ||
235 | kfree(ff); | ||
200 | } | 236 | } |
237 | EXPORT_SYMBOL_GPL(fuse_sync_release); | ||
201 | 238 | ||
202 | /* | 239 | /* |
203 | * Scramble the ID space with XTEA, so that the value of the files_struct | 240 | * Scramble the ID space with XTEA, so that the value of the files_struct |
@@ -371,8 +408,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync) | |||
371 | return fuse_fsync_common(file, de, datasync, 0); | 408 | return fuse_fsync_common(file, de, datasync, 0); |
372 | } | 409 | } |
373 | 410 | ||
374 | void fuse_read_fill(struct fuse_req *req, struct file *file, | 411 | void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, |
375 | struct inode *inode, loff_t pos, size_t count, int opcode) | 412 | size_t count, int opcode) |
376 | { | 413 | { |
377 | struct fuse_read_in *inarg = &req->misc.read.in; | 414 | struct fuse_read_in *inarg = &req->misc.read.in; |
378 | struct fuse_file *ff = file->private_data; | 415 | struct fuse_file *ff = file->private_data; |
@@ -382,7 +419,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, | |||
382 | inarg->size = count; | 419 | inarg->size = count; |
383 | inarg->flags = file->f_flags; | 420 | inarg->flags = file->f_flags; |
384 | req->in.h.opcode = opcode; | 421 | req->in.h.opcode = opcode; |
385 | req->in.h.nodeid = get_node_id(inode); | 422 | req->in.h.nodeid = ff->nodeid; |
386 | req->in.numargs = 1; | 423 | req->in.numargs = 1; |
387 | req->in.args[0].size = sizeof(struct fuse_read_in); | 424 | req->in.args[0].size = sizeof(struct fuse_read_in); |
388 | req->in.args[0].value = inarg; | 425 | req->in.args[0].value = inarg; |
@@ -392,12 +429,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, | |||
392 | } | 429 | } |
393 | 430 | ||
394 | static size_t fuse_send_read(struct fuse_req *req, struct file *file, | 431 | static size_t fuse_send_read(struct fuse_req *req, struct file *file, |
395 | struct inode *inode, loff_t pos, size_t count, | 432 | loff_t pos, size_t count, fl_owner_t owner) |
396 | fl_owner_t owner) | ||
397 | { | 433 | { |
398 | struct fuse_conn *fc = get_fuse_conn(inode); | 434 | struct fuse_file *ff = file->private_data; |
435 | struct fuse_conn *fc = ff->fc; | ||
399 | 436 | ||
400 | fuse_read_fill(req, file, inode, pos, count, FUSE_READ); | 437 | fuse_read_fill(req, file, pos, count, FUSE_READ); |
401 | if (owner != NULL) { | 438 | if (owner != NULL) { |
402 | struct fuse_read_in *inarg = &req->misc.read.in; | 439 | struct fuse_read_in *inarg = &req->misc.read.in; |
403 | 440 | ||
@@ -455,7 +492,7 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
455 | req->out.argpages = 1; | 492 | req->out.argpages = 1; |
456 | req->num_pages = 1; | 493 | req->num_pages = 1; |
457 | req->pages[0] = page; | 494 | req->pages[0] = page; |
458 | num_read = fuse_send_read(req, file, inode, pos, count, NULL); | 495 | num_read = fuse_send_read(req, file, pos, count, NULL); |
459 | err = req->out.h.error; | 496 | err = req->out.h.error; |
460 | fuse_put_request(fc, req); | 497 | fuse_put_request(fc, req); |
461 | 498 | ||
@@ -504,19 +541,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | |||
504 | fuse_file_put(req->ff); | 541 | fuse_file_put(req->ff); |
505 | } | 542 | } |
506 | 543 | ||
507 | static void fuse_send_readpages(struct fuse_req *req, struct file *file, | 544 | static void fuse_send_readpages(struct fuse_req *req, struct file *file) |
508 | struct inode *inode) | ||
509 | { | 545 | { |
510 | struct fuse_conn *fc = get_fuse_conn(inode); | 546 | struct fuse_file *ff = file->private_data; |
547 | struct fuse_conn *fc = ff->fc; | ||
511 | loff_t pos = page_offset(req->pages[0]); | 548 | loff_t pos = page_offset(req->pages[0]); |
512 | size_t count = req->num_pages << PAGE_CACHE_SHIFT; | 549 | size_t count = req->num_pages << PAGE_CACHE_SHIFT; |
513 | 550 | ||
514 | req->out.argpages = 1; | 551 | req->out.argpages = 1; |
515 | req->out.page_zeroing = 1; | 552 | req->out.page_zeroing = 1; |
516 | fuse_read_fill(req, file, inode, pos, count, FUSE_READ); | 553 | fuse_read_fill(req, file, pos, count, FUSE_READ); |
517 | req->misc.read.attr_ver = fuse_get_attr_version(fc); | 554 | req->misc.read.attr_ver = fuse_get_attr_version(fc); |
518 | if (fc->async_read) { | 555 | if (fc->async_read) { |
519 | struct fuse_file *ff = file->private_data; | ||
520 | req->ff = fuse_file_get(ff); | 556 | req->ff = fuse_file_get(ff); |
521 | req->end = fuse_readpages_end; | 557 | req->end = fuse_readpages_end; |
522 | fuse_request_send_background(fc, req); | 558 | fuse_request_send_background(fc, req); |
@@ -546,7 +582,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) | |||
546 | (req->num_pages == FUSE_MAX_PAGES_PER_REQ || | 582 | (req->num_pages == FUSE_MAX_PAGES_PER_REQ || |
547 | (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || | 583 | (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || |
548 | req->pages[req->num_pages - 1]->index + 1 != page->index)) { | 584 | req->pages[req->num_pages - 1]->index + 1 != page->index)) { |
549 | fuse_send_readpages(req, data->file, inode); | 585 | fuse_send_readpages(req, data->file); |
550 | data->req = req = fuse_get_req(fc); | 586 | data->req = req = fuse_get_req(fc); |
551 | if (IS_ERR(req)) { | 587 | if (IS_ERR(req)) { |
552 | unlock_page(page); | 588 | unlock_page(page); |
@@ -580,7 +616,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, | |||
580 | err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); | 616 | err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); |
581 | if (!err) { | 617 | if (!err) { |
582 | if (data.req->num_pages) | 618 | if (data.req->num_pages) |
583 | fuse_send_readpages(data.req, file, inode); | 619 | fuse_send_readpages(data.req, file); |
584 | else | 620 | else |
585 | fuse_put_request(fc, data.req); | 621 | fuse_put_request(fc, data.req); |
586 | } | 622 | } |
@@ -607,24 +643,19 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
607 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | 643 | return generic_file_aio_read(iocb, iov, nr_segs, pos); |
608 | } | 644 | } |
609 | 645 | ||
610 | static void fuse_write_fill(struct fuse_req *req, struct file *file, | 646 | static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, |
611 | struct fuse_file *ff, struct inode *inode, | 647 | loff_t pos, size_t count) |
612 | loff_t pos, size_t count, int writepage) | ||
613 | { | 648 | { |
614 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
615 | struct fuse_write_in *inarg = &req->misc.write.in; | 649 | struct fuse_write_in *inarg = &req->misc.write.in; |
616 | struct fuse_write_out *outarg = &req->misc.write.out; | 650 | struct fuse_write_out *outarg = &req->misc.write.out; |
617 | 651 | ||
618 | memset(inarg, 0, sizeof(struct fuse_write_in)); | ||
619 | inarg->fh = ff->fh; | 652 | inarg->fh = ff->fh; |
620 | inarg->offset = pos; | 653 | inarg->offset = pos; |
621 | inarg->size = count; | 654 | inarg->size = count; |
622 | inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; | ||
623 | inarg->flags = file ? file->f_flags : 0; | ||
624 | req->in.h.opcode = FUSE_WRITE; | 655 | req->in.h.opcode = FUSE_WRITE; |
625 | req->in.h.nodeid = get_node_id(inode); | 656 | req->in.h.nodeid = ff->nodeid; |
626 | req->in.numargs = 2; | 657 | req->in.numargs = 2; |
627 | if (fc->minor < 9) | 658 | if (ff->fc->minor < 9) |
628 | req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; | 659 | req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; |
629 | else | 660 | else |
630 | req->in.args[0].size = sizeof(struct fuse_write_in); | 661 | req->in.args[0].size = sizeof(struct fuse_write_in); |
@@ -636,13 +667,15 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, | |||
636 | } | 667 | } |
637 | 668 | ||
638 | static size_t fuse_send_write(struct fuse_req *req, struct file *file, | 669 | static size_t fuse_send_write(struct fuse_req *req, struct file *file, |
639 | struct inode *inode, loff_t pos, size_t count, | 670 | loff_t pos, size_t count, fl_owner_t owner) |
640 | fl_owner_t owner) | ||
641 | { | 671 | { |
642 | struct fuse_conn *fc = get_fuse_conn(inode); | 672 | struct fuse_file *ff = file->private_data; |
643 | fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); | 673 | struct fuse_conn *fc = ff->fc; |
674 | struct fuse_write_in *inarg = &req->misc.write.in; | ||
675 | |||
676 | fuse_write_fill(req, ff, pos, count); | ||
677 | inarg->flags = file->f_flags; | ||
644 | if (owner != NULL) { | 678 | if (owner != NULL) { |
645 | struct fuse_write_in *inarg = &req->misc.write.in; | ||
646 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; | 679 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; |
647 | inarg->lock_owner = fuse_lock_owner_id(fc, owner); | 680 | inarg->lock_owner = fuse_lock_owner_id(fc, owner); |
648 | } | 681 | } |
@@ -700,7 +733,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, | |||
700 | req->num_pages = 1; | 733 | req->num_pages = 1; |
701 | req->pages[0] = page; | 734 | req->pages[0] = page; |
702 | req->page_offset = offset; | 735 | req->page_offset = offset; |
703 | nres = fuse_send_write(req, file, inode, pos, count, NULL); | 736 | nres = fuse_send_write(req, file, pos, count, NULL); |
704 | err = req->out.h.error; | 737 | err = req->out.h.error; |
705 | fuse_put_request(fc, req); | 738 | fuse_put_request(fc, req); |
706 | if (!err && !nres) | 739 | if (!err && !nres) |
@@ -741,7 +774,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, | |||
741 | for (i = 0; i < req->num_pages; i++) | 774 | for (i = 0; i < req->num_pages; i++) |
742 | fuse_wait_on_page_writeback(inode, req->pages[i]->index); | 775 | fuse_wait_on_page_writeback(inode, req->pages[i]->index); |
743 | 776 | ||
744 | res = fuse_send_write(req, file, inode, pos, count, NULL); | 777 | res = fuse_send_write(req, file, pos, count, NULL); |
745 | 778 | ||
746 | offset = req->page_offset; | 779 | offset = req->page_offset; |
747 | count = res; | 780 | count = res; |
@@ -979,25 +1012,23 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, | |||
979 | return 0; | 1012 | return 0; |
980 | } | 1013 | } |
981 | 1014 | ||
982 | static ssize_t fuse_direct_io(struct file *file, const char __user *buf, | 1015 | ssize_t fuse_direct_io(struct file *file, const char __user *buf, |
983 | size_t count, loff_t *ppos, int write) | 1016 | size_t count, loff_t *ppos, int write) |
984 | { | 1017 | { |
985 | struct inode *inode = file->f_path.dentry->d_inode; | 1018 | struct fuse_file *ff = file->private_data; |
986 | struct fuse_conn *fc = get_fuse_conn(inode); | 1019 | struct fuse_conn *fc = ff->fc; |
987 | size_t nmax = write ? fc->max_write : fc->max_read; | 1020 | size_t nmax = write ? fc->max_write : fc->max_read; |
988 | loff_t pos = *ppos; | 1021 | loff_t pos = *ppos; |
989 | ssize_t res = 0; | 1022 | ssize_t res = 0; |
990 | struct fuse_req *req; | 1023 | struct fuse_req *req; |
991 | 1024 | ||
992 | if (is_bad_inode(inode)) | ||
993 | return -EIO; | ||
994 | |||
995 | req = fuse_get_req(fc); | 1025 | req = fuse_get_req(fc); |
996 | if (IS_ERR(req)) | 1026 | if (IS_ERR(req)) |
997 | return PTR_ERR(req); | 1027 | return PTR_ERR(req); |
998 | 1028 | ||
999 | while (count) { | 1029 | while (count) { |
1000 | size_t nres; | 1030 | size_t nres; |
1031 | fl_owner_t owner = current->files; | ||
1001 | size_t nbytes = min(count, nmax); | 1032 | size_t nbytes = min(count, nmax); |
1002 | int err = fuse_get_user_pages(req, buf, &nbytes, write); | 1033 | int err = fuse_get_user_pages(req, buf, &nbytes, write); |
1003 | if (err) { | 1034 | if (err) { |
@@ -1006,11 +1037,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, | |||
1006 | } | 1037 | } |
1007 | 1038 | ||
1008 | if (write) | 1039 | if (write) |
1009 | nres = fuse_send_write(req, file, inode, pos, nbytes, | 1040 | nres = fuse_send_write(req, file, pos, nbytes, owner); |
1010 | current->files); | ||
1011 | else | 1041 | else |
1012 | nres = fuse_send_read(req, file, inode, pos, nbytes, | 1042 | nres = fuse_send_read(req, file, pos, nbytes, owner); |
1013 | current->files); | 1043 | |
1014 | fuse_release_user_pages(req, !write); | 1044 | fuse_release_user_pages(req, !write); |
1015 | if (req->out.h.error) { | 1045 | if (req->out.h.error) { |
1016 | if (!res) | 1046 | if (!res) |
@@ -1034,20 +1064,27 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, | |||
1034 | } | 1064 | } |
1035 | } | 1065 | } |
1036 | fuse_put_request(fc, req); | 1066 | fuse_put_request(fc, req); |
1037 | if (res > 0) { | 1067 | if (res > 0) |
1038 | if (write) | ||
1039 | fuse_write_update_size(inode, pos); | ||
1040 | *ppos = pos; | 1068 | *ppos = pos; |
1041 | } | ||
1042 | fuse_invalidate_attr(inode); | ||
1043 | 1069 | ||
1044 | return res; | 1070 | return res; |
1045 | } | 1071 | } |
1072 | EXPORT_SYMBOL_GPL(fuse_direct_io); | ||
1046 | 1073 | ||
1047 | static ssize_t fuse_direct_read(struct file *file, char __user *buf, | 1074 | static ssize_t fuse_direct_read(struct file *file, char __user *buf, |
1048 | size_t count, loff_t *ppos) | 1075 | size_t count, loff_t *ppos) |
1049 | { | 1076 | { |
1050 | return fuse_direct_io(file, buf, count, ppos, 0); | 1077 | ssize_t res; |
1078 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1079 | |||
1080 | if (is_bad_inode(inode)) | ||
1081 | return -EIO; | ||
1082 | |||
1083 | res = fuse_direct_io(file, buf, count, ppos, 0); | ||
1084 | |||
1085 | fuse_invalidate_attr(inode); | ||
1086 | |||
1087 | return res; | ||
1051 | } | 1088 | } |
1052 | 1089 | ||
1053 | static ssize_t fuse_direct_write(struct file *file, const char __user *buf, | 1090 | static ssize_t fuse_direct_write(struct file *file, const char __user *buf, |
@@ -1055,12 +1092,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, | |||
1055 | { | 1092 | { |
1056 | struct inode *inode = file->f_path.dentry->d_inode; | 1093 | struct inode *inode = file->f_path.dentry->d_inode; |
1057 | ssize_t res; | 1094 | ssize_t res; |
1095 | |||
1096 | if (is_bad_inode(inode)) | ||
1097 | return -EIO; | ||
1098 | |||
1058 | /* Don't allow parallel writes to the same file */ | 1099 | /* Don't allow parallel writes to the same file */ |
1059 | mutex_lock(&inode->i_mutex); | 1100 | mutex_lock(&inode->i_mutex); |
1060 | res = generic_write_checks(file, ppos, &count, 0); | 1101 | res = generic_write_checks(file, ppos, &count, 0); |
1061 | if (!res) | 1102 | if (!res) { |
1062 | res = fuse_direct_io(file, buf, count, ppos, 1); | 1103 | res = fuse_direct_io(file, buf, count, ppos, 1); |
1104 | if (res > 0) | ||
1105 | fuse_write_update_size(inode, *ppos); | ||
1106 | } | ||
1063 | mutex_unlock(&inode->i_mutex); | 1107 | mutex_unlock(&inode->i_mutex); |
1108 | |||
1109 | fuse_invalidate_attr(inode); | ||
1110 | |||
1064 | return res; | 1111 | return res; |
1065 | } | 1112 | } |
1066 | 1113 | ||
@@ -1177,9 +1224,10 @@ static int fuse_writepage_locked(struct page *page) | |||
1177 | req->ff = fuse_file_get(ff); | 1224 | req->ff = fuse_file_get(ff); |
1178 | spin_unlock(&fc->lock); | 1225 | spin_unlock(&fc->lock); |
1179 | 1226 | ||
1180 | fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); | 1227 | fuse_write_fill(req, ff, page_offset(page), 0); |
1181 | 1228 | ||
1182 | copy_highpage(tmp_page, page); | 1229 | copy_highpage(tmp_page, page); |
1230 | req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; | ||
1183 | req->in.argpages = 1; | 1231 | req->in.argpages = 1; |
1184 | req->num_pages = 1; | 1232 | req->num_pages = 1; |
1185 | req->pages[0] = tmp_page; | 1233 | req->pages[0] = tmp_page; |
@@ -1603,12 +1651,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, | |||
1603 | * limits ioctl data transfers to well-formed ioctls and is the forced | 1651 | * limits ioctl data transfers to well-formed ioctls and is the forced |
1604 | * behavior for all FUSE servers. | 1652 | * behavior for all FUSE servers. |
1605 | */ | 1653 | */ |
1606 | static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, | 1654 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, |
1607 | unsigned long arg, unsigned int flags) | 1655 | unsigned int flags) |
1608 | { | 1656 | { |
1609 | struct inode *inode = file->f_dentry->d_inode; | ||
1610 | struct fuse_file *ff = file->private_data; | 1657 | struct fuse_file *ff = file->private_data; |
1611 | struct fuse_conn *fc = get_fuse_conn(inode); | 1658 | struct fuse_conn *fc = ff->fc; |
1612 | struct fuse_ioctl_in inarg = { | 1659 | struct fuse_ioctl_in inarg = { |
1613 | .fh = ff->fh, | 1660 | .fh = ff->fh, |
1614 | .cmd = cmd, | 1661 | .cmd = cmd, |
@@ -1627,13 +1674,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, | |||
1627 | /* assume all the iovs returned by client always fits in a page */ | 1674 | /* assume all the iovs returned by client always fits in a page */ |
1628 | BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); | 1675 | BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); |
1629 | 1676 | ||
1630 | if (!fuse_allow_task(fc, current)) | ||
1631 | return -EACCES; | ||
1632 | |||
1633 | err = -EIO; | ||
1634 | if (is_bad_inode(inode)) | ||
1635 | goto out; | ||
1636 | |||
1637 | err = -ENOMEM; | 1677 | err = -ENOMEM; |
1638 | pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); | 1678 | pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); |
1639 | iov_page = alloc_page(GFP_KERNEL); | 1679 | iov_page = alloc_page(GFP_KERNEL); |
@@ -1694,7 +1734,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, | |||
1694 | 1734 | ||
1695 | /* okay, let's send it to the client */ | 1735 | /* okay, let's send it to the client */ |
1696 | req->in.h.opcode = FUSE_IOCTL; | 1736 | req->in.h.opcode = FUSE_IOCTL; |
1697 | req->in.h.nodeid = get_node_id(inode); | 1737 | req->in.h.nodeid = ff->nodeid; |
1698 | req->in.numargs = 1; | 1738 | req->in.numargs = 1; |
1699 | req->in.args[0].size = sizeof(inarg); | 1739 | req->in.args[0].size = sizeof(inarg); |
1700 | req->in.args[0].value = &inarg; | 1740 | req->in.args[0].value = &inarg; |
@@ -1777,17 +1817,33 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, | |||
1777 | 1817 | ||
1778 | return err ? err : outarg.result; | 1818 | return err ? err : outarg.result; |
1779 | } | 1819 | } |
1820 | EXPORT_SYMBOL_GPL(fuse_do_ioctl); | ||
1821 | |||
1822 | static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, | ||
1823 | unsigned long arg, unsigned int flags) | ||
1824 | { | ||
1825 | struct inode *inode = file->f_dentry->d_inode; | ||
1826 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
1827 | |||
1828 | if (!fuse_allow_task(fc, current)) | ||
1829 | return -EACCES; | ||
1830 | |||
1831 | if (is_bad_inode(inode)) | ||
1832 | return -EIO; | ||
1833 | |||
1834 | return fuse_do_ioctl(file, cmd, arg, flags); | ||
1835 | } | ||
1780 | 1836 | ||
1781 | static long fuse_file_ioctl(struct file *file, unsigned int cmd, | 1837 | static long fuse_file_ioctl(struct file *file, unsigned int cmd, |
1782 | unsigned long arg) | 1838 | unsigned long arg) |
1783 | { | 1839 | { |
1784 | return fuse_file_do_ioctl(file, cmd, arg, 0); | 1840 | return fuse_file_ioctl_common(file, cmd, arg, 0); |
1785 | } | 1841 | } |
1786 | 1842 | ||
1787 | static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, | 1843 | static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, |
1788 | unsigned long arg) | 1844 | unsigned long arg) |
1789 | { | 1845 | { |
1790 | return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT); | 1846 | return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); |
1791 | } | 1847 | } |
1792 | 1848 | ||
1793 | /* | 1849 | /* |
@@ -1841,11 +1897,10 @@ static void fuse_register_polled_file(struct fuse_conn *fc, | |||
1841 | spin_unlock(&fc->lock); | 1897 | spin_unlock(&fc->lock); |
1842 | } | 1898 | } |
1843 | 1899 | ||
1844 | static unsigned fuse_file_poll(struct file *file, poll_table *wait) | 1900 | unsigned fuse_file_poll(struct file *file, poll_table *wait) |
1845 | { | 1901 | { |
1846 | struct inode *inode = file->f_dentry->d_inode; | ||
1847 | struct fuse_file *ff = file->private_data; | 1902 | struct fuse_file *ff = file->private_data; |
1848 | struct fuse_conn *fc = get_fuse_conn(inode); | 1903 | struct fuse_conn *fc = ff->fc; |
1849 | struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; | 1904 | struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; |
1850 | struct fuse_poll_out outarg; | 1905 | struct fuse_poll_out outarg; |
1851 | struct fuse_req *req; | 1906 | struct fuse_req *req; |
@@ -1870,7 +1925,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) | |||
1870 | return PTR_ERR(req); | 1925 | return PTR_ERR(req); |
1871 | 1926 | ||
1872 | req->in.h.opcode = FUSE_POLL; | 1927 | req->in.h.opcode = FUSE_POLL; |
1873 | req->in.h.nodeid = get_node_id(inode); | 1928 | req->in.h.nodeid = ff->nodeid; |
1874 | req->in.numargs = 1; | 1929 | req->in.numargs = 1; |
1875 | req->in.args[0].size = sizeof(inarg); | 1930 | req->in.args[0].size = sizeof(inarg); |
1876 | req->in.args[0].value = &inarg; | 1931 | req->in.args[0].value = &inarg; |
@@ -1889,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) | |||
1889 | } | 1944 | } |
1890 | return POLLERR; | 1945 | return POLLERR; |
1891 | } | 1946 | } |
1947 | EXPORT_SYMBOL_GPL(fuse_file_poll); | ||
1892 | 1948 | ||
1893 | /* | 1949 | /* |
1894 | * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and | 1950 | * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6fc5aedaa0d5..aaf2f9ff970e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -97,8 +97,13 @@ struct fuse_inode { | |||
97 | struct list_head writepages; | 97 | struct list_head writepages; |
98 | }; | 98 | }; |
99 | 99 | ||
100 | struct fuse_conn; | ||
101 | |||
100 | /** FUSE specific file data */ | 102 | /** FUSE specific file data */ |
101 | struct fuse_file { | 103 | struct fuse_file { |
104 | /** Fuse connection for this file */ | ||
105 | struct fuse_conn *fc; | ||
106 | |||
102 | /** Request reserved for flush and release */ | 107 | /** Request reserved for flush and release */ |
103 | struct fuse_req *reserved_req; | 108 | struct fuse_req *reserved_req; |
104 | 109 | ||
@@ -108,9 +113,15 @@ struct fuse_file { | |||
108 | /** File handle used by userspace */ | 113 | /** File handle used by userspace */ |
109 | u64 fh; | 114 | u64 fh; |
110 | 115 | ||
116 | /** Node id of this file */ | ||
117 | u64 nodeid; | ||
118 | |||
111 | /** Refcount */ | 119 | /** Refcount */ |
112 | atomic_t count; | 120 | atomic_t count; |
113 | 121 | ||
122 | /** FOPEN_* flags returned by open */ | ||
123 | u32 open_flags; | ||
124 | |||
114 | /** Entry on inode's write_files list */ | 125 | /** Entry on inode's write_files list */ |
115 | struct list_head write_entry; | 126 | struct list_head write_entry; |
116 | 127 | ||
@@ -185,8 +196,6 @@ enum fuse_req_state { | |||
185 | FUSE_REQ_FINISHED | 196 | FUSE_REQ_FINISHED |
186 | }; | 197 | }; |
187 | 198 | ||
188 | struct fuse_conn; | ||
189 | |||
190 | /** | 199 | /** |
191 | * A request to the client | 200 | * A request to the client |
192 | */ | 201 | */ |
@@ -248,11 +257,12 @@ struct fuse_req { | |||
248 | struct fuse_forget_in forget_in; | 257 | struct fuse_forget_in forget_in; |
249 | struct { | 258 | struct { |
250 | struct fuse_release_in in; | 259 | struct fuse_release_in in; |
251 | struct vfsmount *vfsmount; | 260 | struct path path; |
252 | struct dentry *dentry; | ||
253 | } release; | 261 | } release; |
254 | struct fuse_init_in init_in; | 262 | struct fuse_init_in init_in; |
255 | struct fuse_init_out init_out; | 263 | struct fuse_init_out init_out; |
264 | struct cuse_init_in cuse_init_in; | ||
265 | struct cuse_init_out cuse_init_out; | ||
256 | struct { | 266 | struct { |
257 | struct fuse_read_in in; | 267 | struct fuse_read_in in; |
258 | u64 attr_ver; | 268 | u64 attr_ver; |
@@ -386,6 +396,9 @@ struct fuse_conn { | |||
386 | /** Filesystem supports NFS exporting. Only set in INIT */ | 396 | /** Filesystem supports NFS exporting. Only set in INIT */ |
387 | unsigned export_support:1; | 397 | unsigned export_support:1; |
388 | 398 | ||
399 | /** Set if bdi is valid */ | ||
400 | unsigned bdi_initialized:1; | ||
401 | |||
389 | /* | 402 | /* |
390 | * The following bitfields are only for optimization purposes | 403 | * The following bitfields are only for optimization purposes |
391 | * and hence races in setting them will not cause malfunction | 404 | * and hence races in setting them will not cause malfunction |
@@ -515,25 +528,24 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, | |||
515 | * Initialize READ or READDIR request | 528 | * Initialize READ or READDIR request |
516 | */ | 529 | */ |
517 | void fuse_read_fill(struct fuse_req *req, struct file *file, | 530 | void fuse_read_fill(struct fuse_req *req, struct file *file, |
518 | struct inode *inode, loff_t pos, size_t count, int opcode); | 531 | loff_t pos, size_t count, int opcode); |
519 | 532 | ||
520 | /** | 533 | /** |
521 | * Send OPEN or OPENDIR request | 534 | * Send OPEN or OPENDIR request |
522 | */ | 535 | */ |
523 | int fuse_open_common(struct inode *inode, struct file *file, int isdir); | 536 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir); |
524 | 537 | ||
525 | struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); | 538 | struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); |
539 | struct fuse_file *fuse_file_get(struct fuse_file *ff); | ||
526 | void fuse_file_free(struct fuse_file *ff); | 540 | void fuse_file_free(struct fuse_file *ff); |
527 | void fuse_finish_open(struct inode *inode, struct file *file, | 541 | void fuse_finish_open(struct inode *inode, struct file *file); |
528 | struct fuse_file *ff, struct fuse_open_out *outarg); | ||
529 | 542 | ||
530 | /** Fill in ff->reserved_req with a RELEASE request */ | 543 | void fuse_sync_release(struct fuse_file *ff, int flags); |
531 | void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode); | ||
532 | 544 | ||
533 | /** | 545 | /** |
534 | * Send RELEASE or RELEASEDIR request | 546 | * Send RELEASE or RELEASEDIR request |
535 | */ | 547 | */ |
536 | int fuse_release_common(struct inode *inode, struct file *file, int isdir); | 548 | void fuse_release_common(struct file *file, int opcode); |
537 | 549 | ||
538 | /** | 550 | /** |
539 | * Send FSYNC or FSYNCDIR request | 551 | * Send FSYNC or FSYNCDIR request |
@@ -652,10 +664,12 @@ void fuse_invalidate_entry_cache(struct dentry *entry); | |||
652 | */ | 664 | */ |
653 | struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); | 665 | struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); |
654 | 666 | ||
667 | void fuse_conn_kill(struct fuse_conn *fc); | ||
668 | |||
655 | /** | 669 | /** |
656 | * Initialize fuse_conn | 670 | * Initialize fuse_conn |
657 | */ | 671 | */ |
658 | int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb); | 672 | void fuse_conn_init(struct fuse_conn *fc); |
659 | 673 | ||
660 | /** | 674 | /** |
661 | * Release reference to fuse_conn | 675 | * Release reference to fuse_conn |
@@ -694,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode); | |||
694 | 708 | ||
695 | u64 fuse_get_attr_version(struct fuse_conn *fc); | 709 | u64 fuse_get_attr_version(struct fuse_conn *fc); |
696 | 710 | ||
711 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | ||
712 | bool isdir); | ||
713 | ssize_t fuse_direct_io(struct file *file, const char __user *buf, | ||
714 | size_t count, loff_t *ppos, int write); | ||
715 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | ||
716 | unsigned int flags); | ||
717 | unsigned fuse_file_poll(struct file *file, poll_table *wait); | ||
718 | int fuse_dev_release(struct inode *inode, struct file *file); | ||
719 | |||
697 | #endif /* _FS_FUSE_I_H */ | 720 | #endif /* _FS_FUSE_I_H */ |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 91f7c85f1ffd..f0df55a52929 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -277,11 +277,14 @@ static void fuse_send_destroy(struct fuse_conn *fc) | |||
277 | } | 277 | } |
278 | } | 278 | } |
279 | 279 | ||
280 | static void fuse_put_super(struct super_block *sb) | 280 | static void fuse_bdi_destroy(struct fuse_conn *fc) |
281 | { | 281 | { |
282 | struct fuse_conn *fc = get_fuse_conn_super(sb); | 282 | if (fc->bdi_initialized) |
283 | bdi_destroy(&fc->bdi); | ||
284 | } | ||
283 | 285 | ||
284 | fuse_send_destroy(fc); | 286 | void fuse_conn_kill(struct fuse_conn *fc) |
287 | { | ||
285 | spin_lock(&fc->lock); | 288 | spin_lock(&fc->lock); |
286 | fc->connected = 0; | 289 | fc->connected = 0; |
287 | fc->blocked = 0; | 290 | fc->blocked = 0; |
@@ -295,7 +298,16 @@ static void fuse_put_super(struct super_block *sb) | |||
295 | list_del(&fc->entry); | 298 | list_del(&fc->entry); |
296 | fuse_ctl_remove_conn(fc); | 299 | fuse_ctl_remove_conn(fc); |
297 | mutex_unlock(&fuse_mutex); | 300 | mutex_unlock(&fuse_mutex); |
298 | bdi_destroy(&fc->bdi); | 301 | fuse_bdi_destroy(fc); |
302 | } | ||
303 | EXPORT_SYMBOL_GPL(fuse_conn_kill); | ||
304 | |||
305 | static void fuse_put_super(struct super_block *sb) | ||
306 | { | ||
307 | struct fuse_conn *fc = get_fuse_conn_super(sb); | ||
308 | |||
309 | fuse_send_destroy(fc); | ||
310 | fuse_conn_kill(fc); | ||
299 | fuse_conn_put(fc); | 311 | fuse_conn_put(fc); |
300 | } | 312 | } |
301 | 313 | ||
@@ -466,10 +478,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
466 | return 0; | 478 | return 0; |
467 | } | 479 | } |
468 | 480 | ||
469 | int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) | 481 | void fuse_conn_init(struct fuse_conn *fc) |
470 | { | 482 | { |
471 | int err; | ||
472 | |||
473 | memset(fc, 0, sizeof(*fc)); | 483 | memset(fc, 0, sizeof(*fc)); |
474 | spin_lock_init(&fc->lock); | 484 | spin_lock_init(&fc->lock); |
475 | mutex_init(&fc->inst_mutex); | 485 | mutex_init(&fc->inst_mutex); |
@@ -484,49 +494,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) | |||
484 | INIT_LIST_HEAD(&fc->bg_queue); | 494 | INIT_LIST_HEAD(&fc->bg_queue); |
485 | INIT_LIST_HEAD(&fc->entry); | 495 | INIT_LIST_HEAD(&fc->entry); |
486 | atomic_set(&fc->num_waiting, 0); | 496 | atomic_set(&fc->num_waiting, 0); |
487 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | ||
488 | fc->bdi.unplug_io_fn = default_unplug_io_fn; | ||
489 | /* fuse does it's own writeback accounting */ | ||
490 | fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; | ||
491 | fc->khctr = 0; | 497 | fc->khctr = 0; |
492 | fc->polled_files = RB_ROOT; | 498 | fc->polled_files = RB_ROOT; |
493 | fc->dev = sb->s_dev; | ||
494 | err = bdi_init(&fc->bdi); | ||
495 | if (err) | ||
496 | goto error_mutex_destroy; | ||
497 | if (sb->s_bdev) { | ||
498 | err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", | ||
499 | MAJOR(fc->dev), MINOR(fc->dev)); | ||
500 | } else { | ||
501 | err = bdi_register_dev(&fc->bdi, fc->dev); | ||
502 | } | ||
503 | if (err) | ||
504 | goto error_bdi_destroy; | ||
505 | /* | ||
506 | * For a single fuse filesystem use max 1% of dirty + | ||
507 | * writeback threshold. | ||
508 | * | ||
509 | * This gives about 1M of write buffer for memory maps on a | ||
510 | * machine with 1G and 10% dirty_ratio, which should be more | ||
511 | * than enough. | ||
512 | * | ||
513 | * Privileged users can raise it by writing to | ||
514 | * | ||
515 | * /sys/class/bdi/<bdi>/max_ratio | ||
516 | */ | ||
517 | bdi_set_max_ratio(&fc->bdi, 1); | ||
518 | fc->reqctr = 0; | 499 | fc->reqctr = 0; |
519 | fc->blocked = 1; | 500 | fc->blocked = 1; |
520 | fc->attr_version = 1; | 501 | fc->attr_version = 1; |
521 | get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); | 502 | get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); |
522 | |||
523 | return 0; | ||
524 | |||
525 | error_bdi_destroy: | ||
526 | bdi_destroy(&fc->bdi); | ||
527 | error_mutex_destroy: | ||
528 | mutex_destroy(&fc->inst_mutex); | ||
529 | return err; | ||
530 | } | 503 | } |
531 | EXPORT_SYMBOL_GPL(fuse_conn_init); | 504 | EXPORT_SYMBOL_GPL(fuse_conn_init); |
532 | 505 | ||
@@ -539,12 +512,14 @@ void fuse_conn_put(struct fuse_conn *fc) | |||
539 | fc->release(fc); | 512 | fc->release(fc); |
540 | } | 513 | } |
541 | } | 514 | } |
515 | EXPORT_SYMBOL_GPL(fuse_conn_put); | ||
542 | 516 | ||
543 | struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) | 517 | struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) |
544 | { | 518 | { |
545 | atomic_inc(&fc->count); | 519 | atomic_inc(&fc->count); |
546 | return fc; | 520 | return fc; |
547 | } | 521 | } |
522 | EXPORT_SYMBOL_GPL(fuse_conn_get); | ||
548 | 523 | ||
549 | static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) | 524 | static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) |
550 | { | 525 | { |
@@ -797,6 +772,48 @@ static void fuse_free_conn(struct fuse_conn *fc) | |||
797 | kfree(fc); | 772 | kfree(fc); |
798 | } | 773 | } |
799 | 774 | ||
775 | static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) | ||
776 | { | ||
777 | int err; | ||
778 | |||
779 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | ||
780 | fc->bdi.unplug_io_fn = default_unplug_io_fn; | ||
781 | /* fuse does it's own writeback accounting */ | ||
782 | fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; | ||
783 | |||
784 | err = bdi_init(&fc->bdi); | ||
785 | if (err) | ||
786 | return err; | ||
787 | |||
788 | fc->bdi_initialized = 1; | ||
789 | |||
790 | if (sb->s_bdev) { | ||
791 | err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", | ||
792 | MAJOR(fc->dev), MINOR(fc->dev)); | ||
793 | } else { | ||
794 | err = bdi_register_dev(&fc->bdi, fc->dev); | ||
795 | } | ||
796 | |||
797 | if (err) | ||
798 | return err; | ||
799 | |||
800 | /* | ||
801 | * For a single fuse filesystem use max 1% of dirty + | ||
802 | * writeback threshold. | ||
803 | * | ||
804 | * This gives about 1M of write buffer for memory maps on a | ||
805 | * machine with 1G and 10% dirty_ratio, which should be more | ||
806 | * than enough. | ||
807 | * | ||
808 | * Privileged users can raise it by writing to | ||
809 | * | ||
810 | * /sys/class/bdi/<bdi>/max_ratio | ||
811 | */ | ||
812 | bdi_set_max_ratio(&fc->bdi, 1); | ||
813 | |||
814 | return 0; | ||
815 | } | ||
816 | |||
800 | static int fuse_fill_super(struct super_block *sb, void *data, int silent) | 817 | static int fuse_fill_super(struct super_block *sb, void *data, int silent) |
801 | { | 818 | { |
802 | struct fuse_conn *fc; | 819 | struct fuse_conn *fc; |
@@ -843,11 +860,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
843 | if (!fc) | 860 | if (!fc) |
844 | goto err_fput; | 861 | goto err_fput; |
845 | 862 | ||
846 | err = fuse_conn_init(fc, sb); | 863 | fuse_conn_init(fc); |
847 | if (err) { | 864 | |
848 | kfree(fc); | 865 | fc->dev = sb->s_dev; |
849 | goto err_fput; | 866 | err = fuse_bdi_init(fc, sb); |
850 | } | 867 | if (err) |
868 | goto err_put_conn; | ||
851 | 869 | ||
852 | fc->release = fuse_free_conn; | 870 | fc->release = fuse_free_conn; |
853 | fc->flags = d.flags; | 871 | fc->flags = d.flags; |
@@ -911,7 +929,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
911 | err_put_root: | 929 | err_put_root: |
912 | dput(root_dentry); | 930 | dput(root_dentry); |
913 | err_put_conn: | 931 | err_put_conn: |
914 | bdi_destroy(&fc->bdi); | 932 | fuse_bdi_destroy(fc); |
915 | fuse_conn_put(fc); | 933 | fuse_conn_put(fc); |
916 | err_fput: | 934 | err_fput: |
917 | fput(file); | 935 | fput(file); |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 3a981b7f64ca..cad957cdb1e5 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -7,6 +7,7 @@ config GFS2_FS | |||
7 | select IP_SCTP if DLM_SCTP | 7 | select IP_SCTP if DLM_SCTP |
8 | select FS_POSIX_ACL | 8 | select FS_POSIX_ACL |
9 | select CRC32 | 9 | select CRC32 |
10 | select SLOW_WORK | ||
10 | help | 11 | help |
11 | A cluster filesystem. | 12 | A cluster filesystem. |
12 | 13 | ||
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index a851ea4bdf70..3da2f1f4f738 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
@@ -1,8 +1,9 @@ | |||
1 | EXTRA_CFLAGS := -I$(src) | ||
1 | obj-$(CONFIG_GFS2_FS) += gfs2.o | 2 | obj-$(CONFIG_GFS2_FS) += gfs2.o |
2 | gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ | 3 | gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ |
3 | glops.o inode.o log.o lops.o main.o meta_io.o \ | 4 | glops.o inode.o log.o lops.o main.o meta_io.o \ |
4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ | 5 | aops.o dentry.o export.o file.o \ |
5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ | 6 | ops_fstype.o ops_inode.o quota.o \ |
6 | recovery.o rgrp.o super.o sys.o trans.o util.o | 7 | recovery.o rgrp.o super.o sys.o trans.o util.o |
7 | 8 | ||
8 | gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o | 9 | gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/aops.c index a6dde1751e17..03ebb439ace0 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/aops.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include "inode.h" | 28 | #include "inode.h" |
29 | #include "log.h" | 29 | #include "log.h" |
30 | #include "meta_io.h" | 30 | #include "meta_io.h" |
31 | #include "ops_address.h" | ||
32 | #include "quota.h" | 31 | #include "quota.h" |
33 | #include "trans.h" | 32 | #include "trans.h" |
34 | #include "rgrp.h" | 33 | #include "rgrp.h" |
@@ -781,10 +780,12 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
781 | unlock_page(page); | 780 | unlock_page(page); |
782 | page_cache_release(page); | 781 | page_cache_release(page); |
783 | 782 | ||
784 | if (inode->i_size < to) { | 783 | if (copied) { |
785 | i_size_write(inode, to); | 784 | if (inode->i_size < to) { |
786 | ip->i_disksize = inode->i_size; | 785 | i_size_write(inode, to); |
787 | di->di_size = cpu_to_be64(inode->i_size); | 786 | ip->i_disksize = inode->i_size; |
787 | } | ||
788 | gfs2_dinode_out(ip, di); | ||
788 | mark_inode_dirty(inode); | 789 | mark_inode_dirty(inode); |
789 | } | 790 | } |
790 | 791 | ||
@@ -824,7 +825,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
824 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 825 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
825 | struct buffer_head *dibh; | 826 | struct buffer_head *dibh; |
826 | struct gfs2_alloc *al = ip->i_alloc; | 827 | struct gfs2_alloc *al = ip->i_alloc; |
827 | struct gfs2_dinode *di; | ||
828 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); | 828 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); |
829 | unsigned int to = from + len; | 829 | unsigned int to = from + len; |
830 | int ret; | 830 | int ret; |
@@ -847,11 +847,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
847 | gfs2_page_add_databufs(ip, page, from, to); | 847 | gfs2_page_add_databufs(ip, page, from, to); |
848 | 848 | ||
849 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 849 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
850 | 850 | if (ret > 0) { | |
851 | if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) { | 851 | if (inode->i_size > ip->i_disksize) |
852 | di = (struct gfs2_dinode *)dibh->b_data; | 852 | ip->i_disksize = inode->i_size; |
853 | ip->i_disksize = inode->i_size; | 853 | gfs2_dinode_out(ip, dibh->b_data); |
854 | di->di_size = cpu_to_be64(inode->i_size); | ||
855 | mark_inode_dirty(inode); | 854 | mark_inode_dirty(inode); |
856 | } | 855 | } |
857 | 856 | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3a5d3f883e10..6d47379e794b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include "trans.h" | 25 | #include "trans.h" |
26 | #include "dir.h" | 26 | #include "dir.h" |
27 | #include "util.h" | 27 | #include "util.h" |
28 | #include "ops_address.h" | 28 | #include "trace_gfs2.h" |
29 | 29 | ||
30 | /* This doesn't need to be that large as max 64 bit pointers in a 4k | 30 | /* This doesn't need to be that large as max 64 bit pointers in a 4k |
31 | * block is 512, so __u16 is fine for that. It saves stack space to | 31 | * block is 512, so __u16 is fine for that. It saves stack space to |
@@ -136,7 +136,9 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
136 | and write it out to disk */ | 136 | and write it out to disk */ |
137 | 137 | ||
138 | unsigned int n = 1; | 138 | unsigned int n = 1; |
139 | block = gfs2_alloc_block(ip, &n); | 139 | error = gfs2_alloc_block(ip, &block, &n); |
140 | if (error) | ||
141 | goto out_brelse; | ||
140 | if (isdir) { | 142 | if (isdir) { |
141 | gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); | 143 | gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); |
142 | error = gfs2_dir_get_new_buffer(ip, block, &bh); | 144 | error = gfs2_dir_get_new_buffer(ip, block, &bh); |
@@ -476,8 +478,11 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
476 | blks = dblks + iblks; | 478 | blks = dblks + iblks; |
477 | i = sheight; | 479 | i = sheight; |
478 | do { | 480 | do { |
481 | int error; | ||
479 | n = blks - alloced; | 482 | n = blks - alloced; |
480 | bn = gfs2_alloc_block(ip, &n); | 483 | error = gfs2_alloc_block(ip, &bn, &n); |
484 | if (error) | ||
485 | return error; | ||
481 | alloced += n; | 486 | alloced += n; |
482 | if (state != ALLOC_DATA || gfs2_is_jdata(ip)) | 487 | if (state != ALLOC_DATA || gfs2_is_jdata(ip)) |
483 | gfs2_trans_add_unrevoke(sdp, bn, n); | 488 | gfs2_trans_add_unrevoke(sdp, bn, n); |
@@ -585,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, | |||
585 | clear_buffer_mapped(bh_map); | 590 | clear_buffer_mapped(bh_map); |
586 | clear_buffer_new(bh_map); | 591 | clear_buffer_new(bh_map); |
587 | clear_buffer_boundary(bh_map); | 592 | clear_buffer_boundary(bh_map); |
593 | trace_gfs2_bmap(ip, bh_map, lblock, create, 1); | ||
588 | if (gfs2_is_dir(ip)) { | 594 | if (gfs2_is_dir(ip)) { |
589 | bsize = sdp->sd_jbsize; | 595 | bsize = sdp->sd_jbsize; |
590 | arr = sdp->sd_jheightsize; | 596 | arr = sdp->sd_jheightsize; |
@@ -619,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, | |||
619 | ret = 0; | 625 | ret = 0; |
620 | out: | 626 | out: |
621 | release_metapath(&mp); | 627 | release_metapath(&mp); |
628 | trace_gfs2_bmap(ip, bh_map, lblock, create, ret); | ||
622 | bmap_unlock(ip, create); | 629 | bmap_unlock(ip, create); |
623 | return ret; | 630 | return ret; |
624 | 631 | ||
@@ -1008,7 +1015,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
1008 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 1015 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
1009 | 1016 | ||
1010 | zero_user(page, offset, length); | 1017 | zero_user(page, offset, length); |
1011 | 1018 | mark_buffer_dirty(bh); | |
1012 | unlock: | 1019 | unlock: |
1013 | unlock_page(page); | 1020 | unlock_page(page); |
1014 | page_cache_release(page); | 1021 | page_cache_release(page); |
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/dentry.c index 022c66cd5606..022c66cd5606 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/dentry.c | |||
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index aef4d0c06748..297d7e5cebad 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -803,13 +803,20 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, | |||
803 | { | 803 | { |
804 | struct gfs2_inode *ip = GFS2_I(inode); | 804 | struct gfs2_inode *ip = GFS2_I(inode); |
805 | unsigned int n = 1; | 805 | unsigned int n = 1; |
806 | u64 bn = gfs2_alloc_block(ip, &n); | 806 | u64 bn; |
807 | struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); | 807 | int error; |
808 | struct buffer_head *bh; | ||
808 | struct gfs2_leaf *leaf; | 809 | struct gfs2_leaf *leaf; |
809 | struct gfs2_dirent *dent; | 810 | struct gfs2_dirent *dent; |
810 | struct qstr name = { .name = "", .len = 0, .hash = 0 }; | 811 | struct qstr name = { .name = "", .len = 0, .hash = 0 }; |
812 | |||
813 | error = gfs2_alloc_block(ip, &bn, &n); | ||
814 | if (error) | ||
815 | return NULL; | ||
816 | bh = gfs2_meta_new(ip->i_gl, bn); | ||
811 | if (!bh) | 817 | if (!bh) |
812 | return NULL; | 818 | return NULL; |
819 | |||
813 | gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); | 820 | gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); |
814 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 821 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
815 | gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); | 822 | gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); |
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 899763aed217..07ea9529adda 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c | |||
@@ -582,8 +582,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) | |||
582 | struct gfs2_ea_header *ea; | 582 | struct gfs2_ea_header *ea; |
583 | unsigned int n = 1; | 583 | unsigned int n = 1; |
584 | u64 block; | 584 | u64 block; |
585 | int error; | ||
585 | 586 | ||
586 | block = gfs2_alloc_block(ip, &n); | 587 | error = gfs2_alloc_block(ip, &block, &n); |
588 | if (error) | ||
589 | return error; | ||
587 | gfs2_trans_add_unrevoke(sdp, block, 1); | 590 | gfs2_trans_add_unrevoke(sdp, block, 1); |
588 | *bhp = gfs2_meta_new(ip->i_gl, block); | 591 | *bhp = gfs2_meta_new(ip->i_gl, block); |
589 | gfs2_trans_add_bh(ip->i_gl, *bhp, 1); | 592 | gfs2_trans_add_bh(ip->i_gl, *bhp, 1); |
@@ -617,6 +620,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
617 | struct gfs2_ea_request *er) | 620 | struct gfs2_ea_request *er) |
618 | { | 621 | { |
619 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 622 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
623 | int error; | ||
620 | 624 | ||
621 | ea->ea_data_len = cpu_to_be32(er->er_data_len); | 625 | ea->ea_data_len = cpu_to_be32(er->er_data_len); |
622 | ea->ea_name_len = er->er_name_len; | 626 | ea->ea_name_len = er->er_name_len; |
@@ -642,7 +646,9 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
642 | int mh_size = sizeof(struct gfs2_meta_header); | 646 | int mh_size = sizeof(struct gfs2_meta_header); |
643 | unsigned int n = 1; | 647 | unsigned int n = 1; |
644 | 648 | ||
645 | block = gfs2_alloc_block(ip, &n); | 649 | error = gfs2_alloc_block(ip, &block, &n); |
650 | if (error) | ||
651 | return error; | ||
646 | gfs2_trans_add_unrevoke(sdp, block, 1); | 652 | gfs2_trans_add_unrevoke(sdp, block, 1); |
647 | bh = gfs2_meta_new(ip->i_gl, block); | 653 | bh = gfs2_meta_new(ip->i_gl, block); |
648 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 654 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
@@ -963,7 +969,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
963 | } else { | 969 | } else { |
964 | u64 blk; | 970 | u64 blk; |
965 | unsigned int n = 1; | 971 | unsigned int n = 1; |
966 | blk = gfs2_alloc_block(ip, &n); | 972 | error = gfs2_alloc_block(ip, &blk, &n); |
973 | if (error) | ||
974 | return error; | ||
967 | gfs2_trans_add_unrevoke(sdp, blk, 1); | 975 | gfs2_trans_add_unrevoke(sdp, blk, 1); |
968 | indbh = gfs2_meta_new(ip->i_gl, blk); | 976 | indbh = gfs2_meta_new(ip->i_gl, blk); |
969 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | 977 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/export.c index 9200ef221716..9200ef221716 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/export.c | |||
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/file.c index 5d82e91887e3..73318a3ce6f1 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/file.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include "trans.h" | 39 | #include "trans.h" |
40 | #include "util.h" | 40 | #include "util.h" |
41 | #include "eaops.h" | 41 | #include "eaops.h" |
42 | #include "ops_address.h" | ||
43 | 42 | ||
44 | /** | 43 | /** |
45 | * gfs2_llseek - seek to a location in a file | 44 | * gfs2_llseek - seek to a location in a file |
@@ -425,33 +424,36 @@ static struct vm_operations_struct gfs2_vm_ops = { | |||
425 | .page_mkwrite = gfs2_page_mkwrite, | 424 | .page_mkwrite = gfs2_page_mkwrite, |
426 | }; | 425 | }; |
427 | 426 | ||
428 | |||
429 | /** | 427 | /** |
430 | * gfs2_mmap - | 428 | * gfs2_mmap - |
431 | * @file: The file to map | 429 | * @file: The file to map |
432 | * @vma: The VMA which described the mapping | 430 | * @vma: The VMA which described the mapping |
433 | * | 431 | * |
434 | * Returns: 0 or error code | 432 | * There is no need to get a lock here unless we should be updating |
433 | * atime. We ignore any locking errors since the only consequence is | ||
434 | * a missed atime update (which will just be deferred until later). | ||
435 | * | ||
436 | * Returns: 0 | ||
435 | */ | 437 | */ |
436 | 438 | ||
437 | static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | 439 | static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) |
438 | { | 440 | { |
439 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | 441 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); |
440 | struct gfs2_holder i_gh; | ||
441 | int error; | ||
442 | 442 | ||
443 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); | 443 | if (!(file->f_flags & O_NOATIME)) { |
444 | error = gfs2_glock_nq(&i_gh); | 444 | struct gfs2_holder i_gh; |
445 | if (error) { | 445 | int error; |
446 | gfs2_holder_uninit(&i_gh); | ||
447 | return error; | ||
448 | } | ||
449 | 446 | ||
447 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
448 | error = gfs2_glock_nq(&i_gh); | ||
449 | file_accessed(file); | ||
450 | if (error == 0) | ||
451 | gfs2_glock_dq_uninit(&i_gh); | ||
452 | } | ||
450 | vma->vm_ops = &gfs2_vm_ops; | 453 | vma->vm_ops = &gfs2_vm_ops; |
454 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
451 | 455 | ||
452 | gfs2_glock_dq_uninit(&i_gh); | 456 | return 0; |
453 | |||
454 | return error; | ||
455 | } | 457 | } |
456 | 458 | ||
457 | /** | 459 | /** |
@@ -692,12 +694,10 @@ static void do_unflock(struct file *file, struct file_lock *fl) | |||
692 | 694 | ||
693 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) | 695 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) |
694 | { | 696 | { |
695 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
696 | |||
697 | if (!(fl->fl_flags & FL_FLOCK)) | 697 | if (!(fl->fl_flags & FL_FLOCK)) |
698 | return -ENOLCK; | 698 | return -ENOLCK; |
699 | if (__mandatory_lock(&ip->i_inode)) | 699 | if (fl->fl_type & LOCK_MAND) |
700 | return -ENOLCK; | 700 | return -EOPNOTSUPP; |
701 | 701 | ||
702 | if (fl->fl_type == F_UNLCK) { | 702 | if (fl->fl_type == F_UNLCK) { |
703 | do_unflock(file, fl); | 703 | do_unflock(file, fl); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ff4981090489..297421c0427a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -39,6 +39,8 @@ | |||
39 | #include "super.h" | 39 | #include "super.h" |
40 | #include "util.h" | 40 | #include "util.h" |
41 | #include "bmap.h" | 41 | #include "bmap.h" |
42 | #define CREATE_TRACE_POINTS | ||
43 | #include "trace_gfs2.h" | ||
42 | 44 | ||
43 | struct gfs2_gl_hash_bucket { | 45 | struct gfs2_gl_hash_bucket { |
44 | struct hlist_head hb_list; | 46 | struct hlist_head hb_list; |
@@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl) | |||
155 | 157 | ||
156 | if (aspace) | 158 | if (aspace) |
157 | gfs2_aspace_put(aspace); | 159 | gfs2_aspace_put(aspace); |
158 | 160 | trace_gfs2_glock_put(gl); | |
159 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); | 161 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); |
160 | } | 162 | } |
161 | 163 | ||
@@ -317,14 +319,17 @@ restart: | |||
317 | return 2; | 319 | return 2; |
318 | gh->gh_error = ret; | 320 | gh->gh_error = ret; |
319 | list_del_init(&gh->gh_list); | 321 | list_del_init(&gh->gh_list); |
322 | trace_gfs2_glock_queue(gh, 0); | ||
320 | gfs2_holder_wake(gh); | 323 | gfs2_holder_wake(gh); |
321 | goto restart; | 324 | goto restart; |
322 | } | 325 | } |
323 | set_bit(HIF_HOLDER, &gh->gh_iflags); | 326 | set_bit(HIF_HOLDER, &gh->gh_iflags); |
327 | trace_gfs2_promote(gh, 1); | ||
324 | gfs2_holder_wake(gh); | 328 | gfs2_holder_wake(gh); |
325 | goto restart; | 329 | goto restart; |
326 | } | 330 | } |
327 | set_bit(HIF_HOLDER, &gh->gh_iflags); | 331 | set_bit(HIF_HOLDER, &gh->gh_iflags); |
332 | trace_gfs2_promote(gh, 0); | ||
328 | gfs2_holder_wake(gh); | 333 | gfs2_holder_wake(gh); |
329 | continue; | 334 | continue; |
330 | } | 335 | } |
@@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret) | |||
354 | else | 359 | else |
355 | continue; | 360 | continue; |
356 | list_del_init(&gh->gh_list); | 361 | list_del_init(&gh->gh_list); |
362 | trace_gfs2_glock_queue(gh, 0); | ||
357 | gfs2_holder_wake(gh); | 363 | gfs2_holder_wake(gh); |
358 | } | 364 | } |
359 | } | 365 | } |
@@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) | |||
422 | int rv; | 428 | int rv; |
423 | 429 | ||
424 | spin_lock(&gl->gl_spin); | 430 | spin_lock(&gl->gl_spin); |
431 | trace_gfs2_glock_state_change(gl, state); | ||
425 | state_change(gl, state); | 432 | state_change(gl, state); |
426 | gh = find_first_waiter(gl); | 433 | gh = find_first_waiter(gl); |
427 | 434 | ||
@@ -796,22 +803,37 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) | |||
796 | gh->gh_ip = 0; | 803 | gh->gh_ip = 0; |
797 | } | 804 | } |
798 | 805 | ||
799 | static int just_schedule(void *word) | 806 | /** |
807 | * gfs2_glock_holder_wait | ||
808 | * @word: unused | ||
809 | * | ||
810 | * This function and gfs2_glock_demote_wait both show up in the WCHAN | ||
811 | * field. Thus I've separated these otherwise identical functions in | ||
812 | * order to be more informative to the user. | ||
813 | */ | ||
814 | |||
815 | static int gfs2_glock_holder_wait(void *word) | ||
800 | { | 816 | { |
801 | schedule(); | 817 | schedule(); |
802 | return 0; | 818 | return 0; |
803 | } | 819 | } |
804 | 820 | ||
821 | static int gfs2_glock_demote_wait(void *word) | ||
822 | { | ||
823 | schedule(); | ||
824 | return 0; | ||
825 | } | ||
826 | |||
805 | static void wait_on_holder(struct gfs2_holder *gh) | 827 | static void wait_on_holder(struct gfs2_holder *gh) |
806 | { | 828 | { |
807 | might_sleep(); | 829 | might_sleep(); |
808 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); | 830 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); |
809 | } | 831 | } |
810 | 832 | ||
811 | static void wait_on_demote(struct gfs2_glock *gl) | 833 | static void wait_on_demote(struct gfs2_glock *gl) |
812 | { | 834 | { |
813 | might_sleep(); | 835 | might_sleep(); |
814 | wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); | 836 | wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); |
815 | } | 837 | } |
816 | 838 | ||
817 | /** | 839 | /** |
@@ -836,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
836 | gl->gl_demote_state != state) { | 858 | gl->gl_demote_state != state) { |
837 | gl->gl_demote_state = LM_ST_UNLOCKED; | 859 | gl->gl_demote_state = LM_ST_UNLOCKED; |
838 | } | 860 | } |
861 | trace_gfs2_demote_rq(gl); | ||
839 | } | 862 | } |
840 | 863 | ||
841 | /** | 864 | /** |
@@ -921,6 +944,7 @@ fail: | |||
921 | goto do_cancel; | 944 | goto do_cancel; |
922 | return; | 945 | return; |
923 | } | 946 | } |
947 | trace_gfs2_glock_queue(gh, 1); | ||
924 | list_add_tail(&gh->gh_list, insert_pt); | 948 | list_add_tail(&gh->gh_list, insert_pt); |
925 | do_cancel: | 949 | do_cancel: |
926 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); | 950 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); |
@@ -1017,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1017 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) | 1041 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) |
1018 | fast_path = 1; | 1042 | fast_path = 1; |
1019 | } | 1043 | } |
1044 | trace_gfs2_glock_queue(gh, 0); | ||
1020 | spin_unlock(&gl->gl_spin); | 1045 | spin_unlock(&gl->gl_spin); |
1021 | if (likely(fast_path)) | 1046 | if (likely(fast_path)) |
1022 | return; | 1047 | return; |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 70f87f43afa2..d5e4ab155ca0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -310,24 +310,6 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) | |||
310 | } | 310 | } |
311 | 311 | ||
312 | /** | 312 | /** |
313 | * rgrp_go_dump - print out an rgrp | ||
314 | * @seq: The iterator | ||
315 | * @gl: The glock in question | ||
316 | * | ||
317 | */ | ||
318 | |||
319 | static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | ||
320 | { | ||
321 | const struct gfs2_rgrpd *rgd = gl->gl_object; | ||
322 | if (rgd == NULL) | ||
323 | return 0; | ||
324 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", | ||
325 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, | ||
326 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | /** | ||
331 | * trans_go_sync - promote/demote the transaction glock | 313 | * trans_go_sync - promote/demote the transaction glock |
332 | * @gl: the glock | 314 | * @gl: the glock |
333 | * @state: the requested state | 315 | * @state: the requested state |
@@ -410,7 +392,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
410 | .go_demote_ok = rgrp_go_demote_ok, | 392 | .go_demote_ok = rgrp_go_demote_ok, |
411 | .go_lock = rgrp_go_lock, | 393 | .go_lock = rgrp_go_lock, |
412 | .go_unlock = rgrp_go_unlock, | 394 | .go_unlock = rgrp_go_unlock, |
413 | .go_dump = rgrp_go_dump, | 395 | .go_dump = gfs2_rgrp_dump, |
414 | .go_type = LM_TYPE_RGRP, | 396 | .go_type = LM_TYPE_RGRP, |
415 | .go_min_hold_time = HZ / 5, | 397 | .go_min_hold_time = HZ / 5, |
416 | }; | 398 | }; |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 399d1b978049..225347fbff3c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/workqueue.h> | 14 | #include <linux/workqueue.h> |
15 | #include <linux/slow-work.h> | ||
15 | #include <linux/dlm.h> | 16 | #include <linux/dlm.h> |
16 | #include <linux/buffer_head.h> | 17 | #include <linux/buffer_head.h> |
17 | 18 | ||
@@ -63,9 +64,12 @@ struct gfs2_log_element { | |||
63 | const struct gfs2_log_operations *le_ops; | 64 | const struct gfs2_log_operations *le_ops; |
64 | }; | 65 | }; |
65 | 66 | ||
67 | #define GBF_FULL 1 | ||
68 | |||
66 | struct gfs2_bitmap { | 69 | struct gfs2_bitmap { |
67 | struct buffer_head *bi_bh; | 70 | struct buffer_head *bi_bh; |
68 | char *bi_clone; | 71 | char *bi_clone; |
72 | unsigned long bi_flags; | ||
69 | u32 bi_offset; | 73 | u32 bi_offset; |
70 | u32 bi_start; | 74 | u32 bi_start; |
71 | u32 bi_len; | 75 | u32 bi_len; |
@@ -90,10 +94,11 @@ struct gfs2_rgrpd { | |||
90 | struct gfs2_sbd *rd_sbd; | 94 | struct gfs2_sbd *rd_sbd; |
91 | unsigned int rd_bh_count; | 95 | unsigned int rd_bh_count; |
92 | u32 rd_last_alloc; | 96 | u32 rd_last_alloc; |
93 | unsigned char rd_flags; | 97 | u32 rd_flags; |
94 | #define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */ | 98 | #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ |
95 | #define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */ | 99 | #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ |
96 | #define GFS2_RDF_UPTODATE 0x04 /* rg is up to date */ | 100 | #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ |
101 | #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ | ||
97 | }; | 102 | }; |
98 | 103 | ||
99 | enum gfs2_state_bits { | 104 | enum gfs2_state_bits { |
@@ -376,11 +381,11 @@ struct gfs2_journal_extent { | |||
376 | struct gfs2_jdesc { | 381 | struct gfs2_jdesc { |
377 | struct list_head jd_list; | 382 | struct list_head jd_list; |
378 | struct list_head extent_list; | 383 | struct list_head extent_list; |
379 | 384 | struct slow_work jd_work; | |
380 | struct inode *jd_inode; | 385 | struct inode *jd_inode; |
386 | unsigned long jd_flags; | ||
387 | #define JDF_RECOVERY 1 | ||
381 | unsigned int jd_jid; | 388 | unsigned int jd_jid; |
382 | int jd_dirty; | ||
383 | |||
384 | unsigned int jd_blocks; | 389 | unsigned int jd_blocks; |
385 | }; | 390 | }; |
386 | 391 | ||
@@ -390,9 +395,6 @@ struct gfs2_statfs_change_host { | |||
390 | s64 sc_dinodes; | 395 | s64 sc_dinodes; |
391 | }; | 396 | }; |
392 | 397 | ||
393 | #define GFS2_GLOCKD_DEFAULT 1 | ||
394 | #define GFS2_GLOCKD_MAX 16 | ||
395 | |||
396 | #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF | 398 | #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF |
397 | #define GFS2_QUOTA_OFF 0 | 399 | #define GFS2_QUOTA_OFF 0 |
398 | #define GFS2_QUOTA_ACCOUNT 1 | 400 | #define GFS2_QUOTA_ACCOUNT 1 |
@@ -418,6 +420,7 @@ struct gfs2_args { | |||
418 | unsigned int ar_data:2; /* ordered/writeback */ | 420 | unsigned int ar_data:2; /* ordered/writeback */ |
419 | unsigned int ar_meta:1; /* mount metafs */ | 421 | unsigned int ar_meta:1; /* mount metafs */ |
420 | unsigned int ar_discard:1; /* discard requests */ | 422 | unsigned int ar_discard:1; /* discard requests */ |
423 | int ar_commit; /* Commit interval */ | ||
421 | }; | 424 | }; |
422 | 425 | ||
423 | struct gfs2_tune { | 426 | struct gfs2_tune { |
@@ -426,7 +429,6 @@ struct gfs2_tune { | |||
426 | unsigned int gt_incore_log_blocks; | 429 | unsigned int gt_incore_log_blocks; |
427 | unsigned int gt_log_flush_secs; | 430 | unsigned int gt_log_flush_secs; |
428 | 431 | ||
429 | unsigned int gt_recoverd_secs; | ||
430 | unsigned int gt_logd_secs; | 432 | unsigned int gt_logd_secs; |
431 | 433 | ||
432 | unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ | 434 | unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ |
@@ -447,6 +449,7 @@ enum { | |||
447 | SDF_JOURNAL_LIVE = 1, | 449 | SDF_JOURNAL_LIVE = 1, |
448 | SDF_SHUTDOWN = 2, | 450 | SDF_SHUTDOWN = 2, |
449 | SDF_NOBARRIERS = 3, | 451 | SDF_NOBARRIERS = 3, |
452 | SDF_NORECOVERY = 4, | ||
450 | }; | 453 | }; |
451 | 454 | ||
452 | #define GFS2_FSNAME_LEN 256 | 455 | #define GFS2_FSNAME_LEN 256 |
@@ -493,7 +496,6 @@ struct lm_lockstruct { | |||
493 | unsigned long ls_flags; | 496 | unsigned long ls_flags; |
494 | dlm_lockspace_t *ls_dlm; | 497 | dlm_lockspace_t *ls_dlm; |
495 | 498 | ||
496 | int ls_recover_jid; | ||
497 | int ls_recover_jid_done; | 499 | int ls_recover_jid_done; |
498 | int ls_recover_jid_status; | 500 | int ls_recover_jid_status; |
499 | }; | 501 | }; |
@@ -582,7 +584,6 @@ struct gfs2_sbd { | |||
582 | 584 | ||
583 | /* Daemon stuff */ | 585 | /* Daemon stuff */ |
584 | 586 | ||
585 | struct task_struct *sd_recoverd_process; | ||
586 | struct task_struct *sd_logd_process; | 587 | struct task_struct *sd_logd_process; |
587 | struct task_struct *sd_quotad_process; | 588 | struct task_struct *sd_quotad_process; |
588 | 589 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5a31d426116f..2f94bd723698 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include "inode.h" | 30 | #include "inode.h" |
31 | #include "log.h" | 31 | #include "log.h" |
32 | #include "meta_io.h" | 32 | #include "meta_io.h" |
33 | #include "ops_address.h" | ||
34 | #include "quota.h" | 33 | #include "quota.h" |
35 | #include "rgrp.h" | 34 | #include "rgrp.h" |
36 | #include "trans.h" | 35 | #include "trans.h" |
@@ -1047,154 +1046,7 @@ fail: | |||
1047 | return ERR_PTR(error); | 1046 | return ERR_PTR(error); |
1048 | } | 1047 | } |
1049 | 1048 | ||
1050 | /** | 1049 | static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) |
1051 | * gfs2_rmdiri - Remove a directory | ||
1052 | * @dip: The parent directory of the directory to be removed | ||
1053 | * @name: The name of the directory to be removed | ||
1054 | * @ip: The GFS2 inode of the directory to be removed | ||
1055 | * | ||
1056 | * Assumes Glocks on dip and ip are held | ||
1057 | * | ||
1058 | * Returns: errno | ||
1059 | */ | ||
1060 | |||
1061 | int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | ||
1062 | struct gfs2_inode *ip) | ||
1063 | { | ||
1064 | struct qstr dotname; | ||
1065 | int error; | ||
1066 | |||
1067 | if (ip->i_entries != 2) { | ||
1068 | if (gfs2_consist_inode(ip)) | ||
1069 | gfs2_dinode_print(ip); | ||
1070 | return -EIO; | ||
1071 | } | ||
1072 | |||
1073 | error = gfs2_dir_del(dip, name); | ||
1074 | if (error) | ||
1075 | return error; | ||
1076 | |||
1077 | error = gfs2_change_nlink(dip, -1); | ||
1078 | if (error) | ||
1079 | return error; | ||
1080 | |||
1081 | gfs2_str2qstr(&dotname, "."); | ||
1082 | error = gfs2_dir_del(ip, &dotname); | ||
1083 | if (error) | ||
1084 | return error; | ||
1085 | |||
1086 | gfs2_str2qstr(&dotname, ".."); | ||
1087 | error = gfs2_dir_del(ip, &dotname); | ||
1088 | if (error) | ||
1089 | return error; | ||
1090 | |||
1091 | /* It looks odd, but it really should be done twice */ | ||
1092 | error = gfs2_change_nlink(ip, -1); | ||
1093 | if (error) | ||
1094 | return error; | ||
1095 | |||
1096 | error = gfs2_change_nlink(ip, -1); | ||
1097 | if (error) | ||
1098 | return error; | ||
1099 | |||
1100 | return error; | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | * gfs2_unlink_ok - check to see that a inode is still in a directory | ||
1105 | * @dip: the directory | ||
1106 | * @name: the name of the file | ||
1107 | * @ip: the inode | ||
1108 | * | ||
1109 | * Assumes that the lock on (at least) @dip is held. | ||
1110 | * | ||
1111 | * Returns: 0 if the parent/child relationship is correct, errno if it isn't | ||
1112 | */ | ||
1113 | |||
1114 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | ||
1115 | const struct gfs2_inode *ip) | ||
1116 | { | ||
1117 | int error; | ||
1118 | |||
1119 | if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) | ||
1120 | return -EPERM; | ||
1121 | |||
1122 | if ((dip->i_inode.i_mode & S_ISVTX) && | ||
1123 | dip->i_inode.i_uid != current_fsuid() && | ||
1124 | ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) | ||
1125 | return -EPERM; | ||
1126 | |||
1127 | if (IS_APPEND(&dip->i_inode)) | ||
1128 | return -EPERM; | ||
1129 | |||
1130 | error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); | ||
1131 | if (error) | ||
1132 | return error; | ||
1133 | |||
1134 | error = gfs2_dir_check(&dip->i_inode, name, ip); | ||
1135 | if (error) | ||
1136 | return error; | ||
1137 | |||
1138 | return 0; | ||
1139 | } | ||
1140 | |||
1141 | /** | ||
1142 | * gfs2_readlinki - return the contents of a symlink | ||
1143 | * @ip: the symlink's inode | ||
1144 | * @buf: a pointer to the buffer to be filled | ||
1145 | * @len: a pointer to the length of @buf | ||
1146 | * | ||
1147 | * If @buf is too small, a piece of memory is kmalloc()ed and needs | ||
1148 | * to be freed by the caller. | ||
1149 | * | ||
1150 | * Returns: errno | ||
1151 | */ | ||
1152 | |||
1153 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | ||
1154 | { | ||
1155 | struct gfs2_holder i_gh; | ||
1156 | struct buffer_head *dibh; | ||
1157 | unsigned int x; | ||
1158 | int error; | ||
1159 | |||
1160 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); | ||
1161 | error = gfs2_glock_nq(&i_gh); | ||
1162 | if (error) { | ||
1163 | gfs2_holder_uninit(&i_gh); | ||
1164 | return error; | ||
1165 | } | ||
1166 | |||
1167 | if (!ip->i_disksize) { | ||
1168 | gfs2_consist_inode(ip); | ||
1169 | error = -EIO; | ||
1170 | goto out; | ||
1171 | } | ||
1172 | |||
1173 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1174 | if (error) | ||
1175 | goto out; | ||
1176 | |||
1177 | x = ip->i_disksize + 1; | ||
1178 | if (x > *len) { | ||
1179 | *buf = kmalloc(x, GFP_NOFS); | ||
1180 | if (!*buf) { | ||
1181 | error = -ENOMEM; | ||
1182 | goto out_brelse; | ||
1183 | } | ||
1184 | } | ||
1185 | |||
1186 | memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); | ||
1187 | *len = x; | ||
1188 | |||
1189 | out_brelse: | ||
1190 | brelse(dibh); | ||
1191 | out: | ||
1192 | gfs2_glock_dq_uninit(&i_gh); | ||
1193 | return error; | ||
1194 | } | ||
1195 | |||
1196 | static int | ||
1197 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | ||
1198 | { | 1050 | { |
1199 | struct buffer_head *dibh; | 1051 | struct buffer_head *dibh; |
1200 | int error; | 1052 | int error; |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index c30be2b66580..c341aaf67adb 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -11,8 +11,16 @@ | |||
11 | #define __INODE_DOT_H__ | 11 | #define __INODE_DOT_H__ |
12 | 12 | ||
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/buffer_head.h> | ||
15 | #include <linux/mm.h> | ||
14 | #include "util.h" | 16 | #include "util.h" |
15 | 17 | ||
18 | extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | ||
19 | extern int gfs2_internal_read(struct gfs2_inode *ip, | ||
20 | struct file_ra_state *ra_state, | ||
21 | char *buf, loff_t *pos, unsigned size); | ||
22 | extern void gfs2_set_aops(struct inode *inode); | ||
23 | |||
16 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) | 24 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) |
17 | { | 25 | { |
18 | return !ip->i_height; | 26 | return !ip->i_height; |
@@ -73,30 +81,26 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
73 | } | 81 | } |
74 | 82 | ||
75 | 83 | ||
76 | void gfs2_set_iop(struct inode *inode); | 84 | extern void gfs2_set_iop(struct inode *inode); |
77 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 85 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
78 | u64 no_addr, u64 no_formal_ino, | 86 | u64 no_addr, u64 no_formal_ino, |
79 | int skip_freeing); | 87 | int skip_freeing); |
80 | struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); | 88 | extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); |
81 | 89 | ||
82 | int gfs2_inode_refresh(struct gfs2_inode *ip); | 90 | extern int gfs2_inode_refresh(struct gfs2_inode *ip); |
83 | 91 | ||
84 | int gfs2_dinode_dealloc(struct gfs2_inode *inode); | 92 | extern int gfs2_dinode_dealloc(struct gfs2_inode *inode); |
85 | int gfs2_change_nlink(struct gfs2_inode *ip, int diff); | 93 | extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff); |
86 | struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | 94 | extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, |
87 | int is_root); | 95 | int is_root); |
88 | struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | 96 | extern struct inode *gfs2_createi(struct gfs2_holder *ghs, |
89 | unsigned int mode, dev_t dev); | 97 | const struct qstr *name, |
90 | int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | 98 | unsigned int mode, dev_t dev); |
91 | struct gfs2_inode *ip); | 99 | extern int gfs2_permission(struct inode *inode, int mask); |
92 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 100 | extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); |
93 | const struct gfs2_inode *ip); | 101 | extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
94 | int gfs2_permission(struct inode *inode, int mask); | 102 | extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 103 | extern void gfs2_dinode_print(const struct gfs2_inode *ip); |
96 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | ||
97 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | ||
98 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | ||
99 | void gfs2_dinode_print(const struct gfs2_inode *ip); | ||
100 | 104 | ||
101 | extern const struct inode_operations gfs2_file_iops; | 105 | extern const struct inode_operations gfs2_file_iops; |
102 | extern const struct inode_operations gfs2_dir_iops; | 106 | extern const struct inode_operations gfs2_dir_iops; |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 98918a756410..13c6237c5f67 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "meta_io.h" | 28 | #include "meta_io.h" |
29 | #include "util.h" | 29 | #include "util.h" |
30 | #include "dir.h" | 30 | #include "dir.h" |
31 | #include "trace_gfs2.h" | ||
31 | 32 | ||
32 | #define PULL 1 | 33 | #define PULL 1 |
33 | 34 | ||
@@ -120,7 +121,7 @@ __acquires(&sdp->sd_log_lock) | |||
120 | lock_buffer(bh); | 121 | lock_buffer(bh); |
121 | if (test_clear_buffer_dirty(bh)) { | 122 | if (test_clear_buffer_dirty(bh)) { |
122 | bh->b_end_io = end_buffer_write_sync; | 123 | bh->b_end_io = end_buffer_write_sync; |
123 | submit_bh(WRITE, bh); | 124 | submit_bh(WRITE_SYNC_PLUG, bh); |
124 | } else { | 125 | } else { |
125 | unlock_buffer(bh); | 126 | unlock_buffer(bh); |
126 | brelse(bh); | 127 | brelse(bh); |
@@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | |||
313 | gfs2_log_lock(sdp); | 314 | gfs2_log_lock(sdp); |
314 | } | 315 | } |
315 | atomic_sub(blks, &sdp->sd_log_blks_free); | 316 | atomic_sub(blks, &sdp->sd_log_blks_free); |
317 | trace_gfs2_log_blocks(sdp, -blks); | ||
316 | gfs2_log_unlock(sdp); | 318 | gfs2_log_unlock(sdp); |
317 | mutex_unlock(&sdp->sd_log_reserve_mutex); | 319 | mutex_unlock(&sdp->sd_log_reserve_mutex); |
318 | 320 | ||
@@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) | |||
333 | 335 | ||
334 | gfs2_log_lock(sdp); | 336 | gfs2_log_lock(sdp); |
335 | atomic_add(blks, &sdp->sd_log_blks_free); | 337 | atomic_add(blks, &sdp->sd_log_blks_free); |
338 | trace_gfs2_log_blocks(sdp, blks); | ||
336 | gfs2_assert_withdraw(sdp, | 339 | gfs2_assert_withdraw(sdp, |
337 | atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); | 340 | atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); |
338 | gfs2_log_unlock(sdp); | 341 | gfs2_log_unlock(sdp); |
@@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
558 | 561 | ||
559 | gfs2_log_lock(sdp); | 562 | gfs2_log_lock(sdp); |
560 | atomic_add(dist, &sdp->sd_log_blks_free); | 563 | atomic_add(dist, &sdp->sd_log_blks_free); |
564 | trace_gfs2_log_blocks(sdp, dist); | ||
561 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); | 565 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); |
562 | gfs2_log_unlock(sdp); | 566 | gfs2_log_unlock(sdp); |
563 | 567 | ||
@@ -604,7 +608,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
604 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) | 608 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
605 | goto skip_barrier; | 609 | goto skip_barrier; |
606 | get_bh(bh); | 610 | get_bh(bh); |
607 | submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); | 611 | submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh); |
608 | wait_on_buffer(bh); | 612 | wait_on_buffer(bh); |
609 | if (buffer_eopnotsupp(bh)) { | 613 | if (buffer_eopnotsupp(bh)) { |
610 | clear_buffer_eopnotsupp(bh); | 614 | clear_buffer_eopnotsupp(bh); |
@@ -664,7 +668,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) | |||
664 | lock_buffer(bh); | 668 | lock_buffer(bh); |
665 | if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { | 669 | if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { |
666 | bh->b_end_io = end_buffer_write_sync; | 670 | bh->b_end_io = end_buffer_write_sync; |
667 | submit_bh(WRITE, bh); | 671 | submit_bh(WRITE_SYNC_PLUG, bh); |
668 | } else { | 672 | } else { |
669 | unlock_buffer(bh); | 673 | unlock_buffer(bh); |
670 | brelse(bh); | 674 | brelse(bh); |
@@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
715 | up_write(&sdp->sd_log_flush_lock); | 719 | up_write(&sdp->sd_log_flush_lock); |
716 | return; | 720 | return; |
717 | } | 721 | } |
722 | trace_gfs2_log_flush(sdp, 1); | ||
718 | 723 | ||
719 | ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); | 724 | ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); |
720 | INIT_LIST_HEAD(&ai->ai_ail1_list); | 725 | INIT_LIST_HEAD(&ai->ai_ail1_list); |
@@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
746 | else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ | 751 | else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ |
747 | gfs2_log_lock(sdp); | 752 | gfs2_log_lock(sdp); |
748 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ | 753 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ |
754 | trace_gfs2_log_blocks(sdp, -1); | ||
749 | gfs2_log_unlock(sdp); | 755 | gfs2_log_unlock(sdp); |
750 | log_write_header(sdp, 0, PULL); | 756 | log_write_header(sdp, 0, PULL); |
751 | } | 757 | } |
@@ -763,8 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
763 | ai = NULL; | 769 | ai = NULL; |
764 | } | 770 | } |
765 | gfs2_log_unlock(sdp); | 771 | gfs2_log_unlock(sdp); |
766 | 772 | trace_gfs2_log_flush(sdp, 0); | |
767 | sdp->sd_vfs->s_dirt = 0; | ||
768 | up_write(&sdp->sd_log_flush_lock); | 773 | up_write(&sdp->sd_log_flush_lock); |
769 | 774 | ||
770 | kfree(ai); | 775 | kfree(ai); |
@@ -788,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
788 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); | 793 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); |
789 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; | 794 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; |
790 | atomic_add(unused, &sdp->sd_log_blks_free); | 795 | atomic_add(unused, &sdp->sd_log_blks_free); |
796 | trace_gfs2_log_blocks(sdp, unused); | ||
791 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= | 797 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= |
792 | sdp->sd_jdesc->jd_blocks); | 798 | sdp->sd_jdesc->jd_blocks); |
793 | sdp->sd_log_blks_reserved = reserved; | 799 | sdp->sd_log_blks_reserved = reserved; |
@@ -823,7 +829,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
823 | log_refund(sdp, tr); | 829 | log_refund(sdp, tr); |
824 | buf_lo_incore_commit(sdp, tr); | 830 | buf_lo_incore_commit(sdp, tr); |
825 | 831 | ||
826 | sdp->sd_vfs->s_dirt = 1; | ||
827 | up_read(&sdp->sd_log_flush_lock); | 832 | up_read(&sdp->sd_log_flush_lock); |
828 | 833 | ||
829 | gfs2_log_lock(sdp); | 834 | gfs2_log_lock(sdp); |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 80e4f5f898bb..9969ff062c5b 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #include <linux/completion.h> | 13 | #include <linux/completion.h> |
14 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
15 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
16 | #include <linux/bio.h> | ||
17 | #include <linux/fs.h> | ||
16 | 18 | ||
17 | #include "gfs2.h" | 19 | #include "gfs2.h" |
18 | #include "incore.h" | 20 | #include "incore.h" |
@@ -25,6 +27,7 @@ | |||
25 | #include "rgrp.h" | 27 | #include "rgrp.h" |
26 | #include "trans.h" | 28 | #include "trans.h" |
27 | #include "util.h" | 29 | #include "util.h" |
30 | #include "trace_gfs2.h" | ||
28 | 31 | ||
29 | /** | 32 | /** |
30 | * gfs2_pin - Pin a buffer in memory | 33 | * gfs2_pin - Pin a buffer in memory |
@@ -51,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) | |||
51 | if (bd->bd_ail) | 54 | if (bd->bd_ail) |
52 | list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); | 55 | list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); |
53 | get_bh(bh); | 56 | get_bh(bh); |
57 | trace_gfs2_pin(bd, 1); | ||
54 | } | 58 | } |
55 | 59 | ||
56 | /** | 60 | /** |
@@ -87,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
87 | bd->bd_ail = ai; | 91 | bd->bd_ail = ai; |
88 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); | 92 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); |
89 | clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); | 93 | clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); |
94 | trace_gfs2_pin(bd, 0); | ||
90 | gfs2_log_unlock(sdp); | 95 | gfs2_log_unlock(sdp); |
91 | unlock_buffer(bh); | 96 | unlock_buffer(bh); |
92 | } | 97 | } |
@@ -189,7 +194,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) | |||
189 | } | 194 | } |
190 | 195 | ||
191 | gfs2_log_unlock(sdp); | 196 | gfs2_log_unlock(sdp); |
192 | submit_bh(WRITE, bh); | 197 | submit_bh(WRITE_SYNC_PLUG, bh); |
193 | gfs2_log_lock(sdp); | 198 | gfs2_log_lock(sdp); |
194 | 199 | ||
195 | n = 0; | 200 | n = 0; |
@@ -199,7 +204,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) | |||
199 | gfs2_log_unlock(sdp); | 204 | gfs2_log_unlock(sdp); |
200 | lock_buffer(bd2->bd_bh); | 205 | lock_buffer(bd2->bd_bh); |
201 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); | 206 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); |
202 | submit_bh(WRITE, bh); | 207 | submit_bh(WRITE_SYNC_PLUG, bh); |
203 | gfs2_log_lock(sdp); | 208 | gfs2_log_lock(sdp); |
204 | if (++n >= num) | 209 | if (++n >= num) |
205 | break; | 210 | break; |
@@ -341,7 +346,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
341 | sdp->sd_log_num_revoke--; | 346 | sdp->sd_log_num_revoke--; |
342 | 347 | ||
343 | if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { | 348 | if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { |
344 | submit_bh(WRITE, bh); | 349 | submit_bh(WRITE_SYNC_PLUG, bh); |
345 | 350 | ||
346 | bh = gfs2_log_get_buf(sdp); | 351 | bh = gfs2_log_get_buf(sdp); |
347 | mh = (struct gfs2_meta_header *)bh->b_data; | 352 | mh = (struct gfs2_meta_header *)bh->b_data; |
@@ -358,7 +363,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
358 | } | 363 | } |
359 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | 364 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
360 | 365 | ||
361 | submit_bh(WRITE, bh); | 366 | submit_bh(WRITE_SYNC_PLUG, bh); |
362 | } | 367 | } |
363 | 368 | ||
364 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, | 369 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, |
@@ -560,7 +565,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
560 | ptr = bh_log_ptr(bh); | 565 | ptr = bh_log_ptr(bh); |
561 | 566 | ||
562 | get_bh(bh); | 567 | get_bh(bh); |
563 | submit_bh(WRITE, bh); | 568 | submit_bh(WRITE_SYNC_PLUG, bh); |
564 | gfs2_log_lock(sdp); | 569 | gfs2_log_lock(sdp); |
565 | while(!list_empty(list)) { | 570 | while(!list_empty(list)) { |
566 | bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); | 571 | bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); |
@@ -586,7 +591,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
586 | } else { | 591 | } else { |
587 | bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); | 592 | bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); |
588 | } | 593 | } |
589 | submit_bh(WRITE, bh1); | 594 | submit_bh(WRITE_SYNC_PLUG, bh1); |
590 | gfs2_log_lock(sdp); | 595 | gfs2_log_lock(sdp); |
591 | ptr += 2; | 596 | ptr += 2; |
592 | } | 597 | } |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a6892ed0840a..eacd78a5d082 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
17 | #include <asm/atomic.h> | 17 | #include <asm/atomic.h> |
18 | #include <linux/slow-work.h> | ||
18 | 19 | ||
19 | #include "gfs2.h" | 20 | #include "gfs2.h" |
20 | #include "incore.h" | 21 | #include "incore.h" |
@@ -113,12 +114,18 @@ static int __init init_gfs2_fs(void) | |||
113 | if (error) | 114 | if (error) |
114 | goto fail_unregister; | 115 | goto fail_unregister; |
115 | 116 | ||
117 | error = slow_work_register_user(); | ||
118 | if (error) | ||
119 | goto fail_slow; | ||
120 | |||
116 | gfs2_register_debugfs(); | 121 | gfs2_register_debugfs(); |
117 | 122 | ||
118 | printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); | 123 | printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); |
119 | 124 | ||
120 | return 0; | 125 | return 0; |
121 | 126 | ||
127 | fail_slow: | ||
128 | unregister_filesystem(&gfs2meta_fs_type); | ||
122 | fail_unregister: | 129 | fail_unregister: |
123 | unregister_filesystem(&gfs2_fs_type); | 130 | unregister_filesystem(&gfs2_fs_type); |
124 | fail: | 131 | fail: |
@@ -156,6 +163,7 @@ static void __exit exit_gfs2_fs(void) | |||
156 | gfs2_unregister_debugfs(); | 163 | gfs2_unregister_debugfs(); |
157 | unregister_filesystem(&gfs2_fs_type); | 164 | unregister_filesystem(&gfs2_fs_type); |
158 | unregister_filesystem(&gfs2meta_fs_type); | 165 | unregister_filesystem(&gfs2meta_fs_type); |
166 | slow_work_unregister_user(); | ||
159 | 167 | ||
160 | kmem_cache_destroy(gfs2_quotad_cachep); | 168 | kmem_cache_destroy(gfs2_quotad_cachep); |
161 | kmem_cache_destroy(gfs2_rgrpd_cachep); | 169 | kmem_cache_destroy(gfs2_rgrpd_cachep); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 8d6f13256b26..cb8d7a93d5ec 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -31,19 +31,66 @@ | |||
31 | #include "rgrp.h" | 31 | #include "rgrp.h" |
32 | #include "trans.h" | 32 | #include "trans.h" |
33 | #include "util.h" | 33 | #include "util.h" |
34 | #include "ops_address.h" | ||
35 | 34 | ||
36 | static int aspace_get_block(struct inode *inode, sector_t lblock, | 35 | static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) |
37 | struct buffer_head *bh_result, int create) | ||
38 | { | 36 | { |
39 | gfs2_assert_warn(inode->i_sb->s_fs_info, 0); | 37 | int err; |
40 | return -EOPNOTSUPP; | 38 | struct buffer_head *bh, *head; |
41 | } | 39 | int nr_underway = 0; |
40 | int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? | ||
41 | WRITE_SYNC_PLUG : WRITE)); | ||
42 | |||
43 | BUG_ON(!PageLocked(page)); | ||
44 | BUG_ON(!page_has_buffers(page)); | ||
45 | |||
46 | head = page_buffers(page); | ||
47 | bh = head; | ||
48 | |||
49 | do { | ||
50 | if (!buffer_mapped(bh)) | ||
51 | continue; | ||
52 | /* | ||
53 | * If it's a fully non-blocking write attempt and we cannot | ||
54 | * lock the buffer then redirty the page. Note that this can | ||
55 | * potentially cause a busy-wait loop from pdflush and kswapd | ||
56 | * activity, but those code paths have their own higher-level | ||
57 | * throttling. | ||
58 | */ | ||
59 | if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { | ||
60 | lock_buffer(bh); | ||
61 | } else if (!trylock_buffer(bh)) { | ||
62 | redirty_page_for_writepage(wbc, page); | ||
63 | continue; | ||
64 | } | ||
65 | if (test_clear_buffer_dirty(bh)) { | ||
66 | mark_buffer_async_write(bh); | ||
67 | } else { | ||
68 | unlock_buffer(bh); | ||
69 | } | ||
70 | } while ((bh = bh->b_this_page) != head); | ||
71 | |||
72 | /* | ||
73 | * The page and its buffers are protected by PageWriteback(), so we can | ||
74 | * drop the bh refcounts early. | ||
75 | */ | ||
76 | BUG_ON(PageWriteback(page)); | ||
77 | set_page_writeback(page); | ||
78 | |||
79 | do { | ||
80 | struct buffer_head *next = bh->b_this_page; | ||
81 | if (buffer_async_write(bh)) { | ||
82 | submit_bh(write_op, bh); | ||
83 | nr_underway++; | ||
84 | } | ||
85 | bh = next; | ||
86 | } while (bh != head); | ||
87 | unlock_page(page); | ||
42 | 88 | ||
43 | static int gfs2_aspace_writepage(struct page *page, | 89 | err = 0; |
44 | struct writeback_control *wbc) | 90 | if (nr_underway == 0) |
45 | { | 91 | end_page_writeback(page); |
46 | return block_write_full_page(page, aspace_get_block, wbc); | 92 | |
93 | return err; | ||
47 | } | 94 | } |
48 | 95 | ||
49 | static const struct address_space_operations aspace_aops = { | 96 | static const struct address_space_operations aspace_aops = { |
@@ -201,16 +248,32 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) | |||
201 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | 248 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, |
202 | struct buffer_head **bhp) | 249 | struct buffer_head **bhp) |
203 | { | 250 | { |
204 | *bhp = gfs2_getbuf(gl, blkno, CREATE); | 251 | struct gfs2_sbd *sdp = gl->gl_sbd; |
205 | if (!buffer_uptodate(*bhp)) { | 252 | struct buffer_head *bh; |
206 | ll_rw_block(READ_META, 1, bhp); | 253 | |
207 | if (flags & DIO_WAIT) { | 254 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
208 | int error = gfs2_meta_wait(gl->gl_sbd, *bhp); | 255 | return -EIO; |
209 | if (error) { | 256 | |
210 | brelse(*bhp); | 257 | *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); |
211 | return error; | 258 | |
212 | } | 259 | lock_buffer(bh); |
213 | } | 260 | if (buffer_uptodate(bh)) { |
261 | unlock_buffer(bh); | ||
262 | return 0; | ||
263 | } | ||
264 | bh->b_end_io = end_buffer_read_sync; | ||
265 | get_bh(bh); | ||
266 | submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); | ||
267 | if (!(flags & DIO_WAIT)) | ||
268 | return 0; | ||
269 | |||
270 | wait_on_buffer(bh); | ||
271 | if (unlikely(!buffer_uptodate(bh))) { | ||
272 | struct gfs2_trans *tr = current->journal_info; | ||
273 | if (tr && tr->tr_touched) | ||
274 | gfs2_io_error_bh(sdp, bh); | ||
275 | brelse(bh); | ||
276 | return -EIO; | ||
214 | } | 277 | } |
215 | 278 | ||
216 | return 0; | 279 | return 0; |
@@ -404,7 +467,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
404 | if (buffer_uptodate(first_bh)) | 467 | if (buffer_uptodate(first_bh)) |
405 | goto out; | 468 | goto out; |
406 | if (!buffer_locked(first_bh)) | 469 | if (!buffer_locked(first_bh)) |
407 | ll_rw_block(READ_META, 1, &first_bh); | 470 | ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); |
408 | 471 | ||
409 | dblock++; | 472 | dblock++; |
410 | extlen--; | 473 | extlen--; |
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c deleted file mode 100644 index f7e8527a21e0..000000000000 --- a/fs/gfs2/mount.c +++ /dev/null | |||
@@ -1,185 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/slab.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/completion.h> | ||
13 | #include <linux/buffer_head.h> | ||
14 | #include <linux/gfs2_ondisk.h> | ||
15 | #include <linux/parser.h> | ||
16 | |||
17 | #include "gfs2.h" | ||
18 | #include "incore.h" | ||
19 | #include "super.h" | ||
20 | #include "sys.h" | ||
21 | #include "util.h" | ||
22 | |||
23 | enum { | ||
24 | Opt_lockproto, | ||
25 | Opt_locktable, | ||
26 | Opt_hostdata, | ||
27 | Opt_spectator, | ||
28 | Opt_ignore_local_fs, | ||
29 | Opt_localflocks, | ||
30 | Opt_localcaching, | ||
31 | Opt_debug, | ||
32 | Opt_nodebug, | ||
33 | Opt_upgrade, | ||
34 | Opt_acl, | ||
35 | Opt_noacl, | ||
36 | Opt_quota_off, | ||
37 | Opt_quota_account, | ||
38 | Opt_quota_on, | ||
39 | Opt_quota, | ||
40 | Opt_noquota, | ||
41 | Opt_suiddir, | ||
42 | Opt_nosuiddir, | ||
43 | Opt_data_writeback, | ||
44 | Opt_data_ordered, | ||
45 | Opt_meta, | ||
46 | Opt_discard, | ||
47 | Opt_nodiscard, | ||
48 | Opt_err, | ||
49 | }; | ||
50 | |||
51 | static const match_table_t tokens = { | ||
52 | {Opt_lockproto, "lockproto=%s"}, | ||
53 | {Opt_locktable, "locktable=%s"}, | ||
54 | {Opt_hostdata, "hostdata=%s"}, | ||
55 | {Opt_spectator, "spectator"}, | ||
56 | {Opt_ignore_local_fs, "ignore_local_fs"}, | ||
57 | {Opt_localflocks, "localflocks"}, | ||
58 | {Opt_localcaching, "localcaching"}, | ||
59 | {Opt_debug, "debug"}, | ||
60 | {Opt_nodebug, "nodebug"}, | ||
61 | {Opt_upgrade, "upgrade"}, | ||
62 | {Opt_acl, "acl"}, | ||
63 | {Opt_noacl, "noacl"}, | ||
64 | {Opt_quota_off, "quota=off"}, | ||
65 | {Opt_quota_account, "quota=account"}, | ||
66 | {Opt_quota_on, "quota=on"}, | ||
67 | {Opt_quota, "quota"}, | ||
68 | {Opt_noquota, "noquota"}, | ||
69 | {Opt_suiddir, "suiddir"}, | ||
70 | {Opt_nosuiddir, "nosuiddir"}, | ||
71 | {Opt_data_writeback, "data=writeback"}, | ||
72 | {Opt_data_ordered, "data=ordered"}, | ||
73 | {Opt_meta, "meta"}, | ||
74 | {Opt_discard, "discard"}, | ||
75 | {Opt_nodiscard, "nodiscard"}, | ||
76 | {Opt_err, NULL} | ||
77 | }; | ||
78 | |||
79 | /** | ||
80 | * gfs2_mount_args - Parse mount options | ||
81 | * @sdp: | ||
82 | * @data: | ||
83 | * | ||
84 | * Return: errno | ||
85 | */ | ||
86 | |||
87 | int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) | ||
88 | { | ||
89 | char *o; | ||
90 | int token; | ||
91 | substring_t tmp[MAX_OPT_ARGS]; | ||
92 | |||
93 | /* Split the options into tokens with the "," character and | ||
94 | process them */ | ||
95 | |||
96 | while (1) { | ||
97 | o = strsep(&options, ","); | ||
98 | if (o == NULL) | ||
99 | break; | ||
100 | if (*o == '\0') | ||
101 | continue; | ||
102 | |||
103 | token = match_token(o, tokens, tmp); | ||
104 | switch (token) { | ||
105 | case Opt_lockproto: | ||
106 | match_strlcpy(args->ar_lockproto, &tmp[0], | ||
107 | GFS2_LOCKNAME_LEN); | ||
108 | break; | ||
109 | case Opt_locktable: | ||
110 | match_strlcpy(args->ar_locktable, &tmp[0], | ||
111 | GFS2_LOCKNAME_LEN); | ||
112 | break; | ||
113 | case Opt_hostdata: | ||
114 | match_strlcpy(args->ar_hostdata, &tmp[0], | ||
115 | GFS2_LOCKNAME_LEN); | ||
116 | break; | ||
117 | case Opt_spectator: | ||
118 | args->ar_spectator = 1; | ||
119 | break; | ||
120 | case Opt_ignore_local_fs: | ||
121 | args->ar_ignore_local_fs = 1; | ||
122 | break; | ||
123 | case Opt_localflocks: | ||
124 | args->ar_localflocks = 1; | ||
125 | break; | ||
126 | case Opt_localcaching: | ||
127 | args->ar_localcaching = 1; | ||
128 | break; | ||
129 | case Opt_debug: | ||
130 | args->ar_debug = 1; | ||
131 | break; | ||
132 | case Opt_nodebug: | ||
133 | args->ar_debug = 0; | ||
134 | break; | ||
135 | case Opt_upgrade: | ||
136 | args->ar_upgrade = 1; | ||
137 | break; | ||
138 | case Opt_acl: | ||
139 | args->ar_posix_acl = 1; | ||
140 | break; | ||
141 | case Opt_noacl: | ||
142 | args->ar_posix_acl = 0; | ||
143 | break; | ||
144 | case Opt_quota_off: | ||
145 | case Opt_noquota: | ||
146 | args->ar_quota = GFS2_QUOTA_OFF; | ||
147 | break; | ||
148 | case Opt_quota_account: | ||
149 | args->ar_quota = GFS2_QUOTA_ACCOUNT; | ||
150 | break; | ||
151 | case Opt_quota_on: | ||
152 | case Opt_quota: | ||
153 | args->ar_quota = GFS2_QUOTA_ON; | ||
154 | break; | ||
155 | case Opt_suiddir: | ||
156 | args->ar_suiddir = 1; | ||
157 | break; | ||
158 | case Opt_nosuiddir: | ||
159 | args->ar_suiddir = 0; | ||
160 | break; | ||
161 | case Opt_data_writeback: | ||
162 | args->ar_data = GFS2_DATA_WRITEBACK; | ||
163 | break; | ||
164 | case Opt_data_ordered: | ||
165 | args->ar_data = GFS2_DATA_ORDERED; | ||
166 | break; | ||
167 | case Opt_meta: | ||
168 | args->ar_meta = 1; | ||
169 | break; | ||
170 | case Opt_discard: | ||
171 | args->ar_discard = 1; | ||
172 | break; | ||
173 | case Opt_nodiscard: | ||
174 | args->ar_discard = 0; | ||
175 | break; | ||
176 | case Opt_err: | ||
177 | default: | ||
178 | fs_info(sdp, "invalid mount option: %s\n", o); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | return 0; | ||
184 | } | ||
185 | |||
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h deleted file mode 100644 index 5da21285bba4..000000000000 --- a/fs/gfs2/ops_address.h +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #ifndef __OPS_ADDRESS_DOT_H__ | ||
11 | #define __OPS_ADDRESS_DOT_H__ | ||
12 | |||
13 | #include <linux/fs.h> | ||
14 | #include <linux/buffer_head.h> | ||
15 | #include <linux/mm.h> | ||
16 | |||
17 | extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | ||
18 | extern int gfs2_internal_read(struct gfs2_inode *ip, | ||
19 | struct file_ra_state *ra_state, | ||
20 | char *buf, loff_t *pos, unsigned size); | ||
21 | extern void gfs2_set_aops(struct inode *inode); | ||
22 | |||
23 | #endif /* __OPS_ADDRESS_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ff9473ea753..7bc3c45cd676 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include <linux/mount.h> | 18 | #include <linux/mount.h> |
19 | #include <linux/gfs2_ondisk.h> | 19 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/slow-work.h> | ||
20 | 21 | ||
21 | #include "gfs2.h" | 22 | #include "gfs2.h" |
22 | #include "incore.h" | 23 | #include "incore.h" |
@@ -32,6 +33,7 @@ | |||
32 | #include "log.h" | 33 | #include "log.h" |
33 | #include "quota.h" | 34 | #include "quota.h" |
34 | #include "dir.h" | 35 | #include "dir.h" |
36 | #include "trace_gfs2.h" | ||
35 | 37 | ||
36 | #define DO 0 | 38 | #define DO 0 |
37 | #define UNDO 1 | 39 | #define UNDO 1 |
@@ -55,8 +57,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) | |||
55 | spin_lock_init(>->gt_spin); | 57 | spin_lock_init(>->gt_spin); |
56 | 58 | ||
57 | gt->gt_incore_log_blocks = 1024; | 59 | gt->gt_incore_log_blocks = 1024; |
58 | gt->gt_log_flush_secs = 60; | ||
59 | gt->gt_recoverd_secs = 60; | ||
60 | gt->gt_logd_secs = 1; | 60 | gt->gt_logd_secs = 1; |
61 | gt->gt_quota_simul_sync = 64; | 61 | gt->gt_quota_simul_sync = 64; |
62 | gt->gt_quota_warn_period = 10; | 62 | gt->gt_quota_warn_period = 10; |
@@ -526,11 +526,11 @@ static int init_sb(struct gfs2_sbd *sdp, int silent) | |||
526 | } | 526 | } |
527 | 527 | ||
528 | /* Set up the buffer cache and SB for real */ | 528 | /* Set up the buffer cache and SB for real */ |
529 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { | 529 | if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) { |
530 | ret = -EINVAL; | 530 | ret = -EINVAL; |
531 | fs_err(sdp, "FS block size (%u) is too small for device " | 531 | fs_err(sdp, "FS block size (%u) is too small for device " |
532 | "block size (%u)\n", | 532 | "block size (%u)\n", |
533 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); | 533 | sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev)); |
534 | goto out; | 534 | goto out; |
535 | } | 535 | } |
536 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { | 536 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { |
@@ -676,6 +676,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
676 | break; | 676 | break; |
677 | 677 | ||
678 | INIT_LIST_HEAD(&jd->extent_list); | 678 | INIT_LIST_HEAD(&jd->extent_list); |
679 | slow_work_init(&jd->jd_work, &gfs2_recover_ops); | ||
679 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); | 680 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); |
680 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { | 681 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { |
681 | if (!jd->jd_inode) | 682 | if (!jd->jd_inode) |
@@ -701,14 +702,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
701 | { | 702 | { |
702 | struct inode *master = sdp->sd_master_dir->d_inode; | 703 | struct inode *master = sdp->sd_master_dir->d_inode; |
703 | struct gfs2_holder ji_gh; | 704 | struct gfs2_holder ji_gh; |
704 | struct task_struct *p; | ||
705 | struct gfs2_inode *ip; | 705 | struct gfs2_inode *ip; |
706 | int jindex = 1; | 706 | int jindex = 1; |
707 | int error = 0; | 707 | int error = 0; |
708 | 708 | ||
709 | if (undo) { | 709 | if (undo) { |
710 | jindex = 0; | 710 | jindex = 0; |
711 | goto fail_recoverd; | 711 | goto fail_jinode_gh; |
712 | } | 712 | } |
713 | 713 | ||
714 | sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); | 714 | sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); |
@@ -776,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
776 | /* Map the extents for this journal's blocks */ | 776 | /* Map the extents for this journal's blocks */ |
777 | map_journal_extents(sdp); | 777 | map_journal_extents(sdp); |
778 | } | 778 | } |
779 | trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); | ||
779 | 780 | ||
780 | if (sdp->sd_lockstruct.ls_first) { | 781 | if (sdp->sd_lockstruct.ls_first) { |
781 | unsigned int x; | 782 | unsigned int x; |
@@ -801,18 +802,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
801 | gfs2_glock_dq_uninit(&ji_gh); | 802 | gfs2_glock_dq_uninit(&ji_gh); |
802 | jindex = 0; | 803 | jindex = 0; |
803 | 804 | ||
804 | p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd"); | ||
805 | error = IS_ERR(p); | ||
806 | if (error) { | ||
807 | fs_err(sdp, "can't start recoverd thread: %d\n", error); | ||
808 | goto fail_jinode_gh; | ||
809 | } | ||
810 | sdp->sd_recoverd_process = p; | ||
811 | |||
812 | return 0; | 805 | return 0; |
813 | 806 | ||
814 | fail_recoverd: | ||
815 | kthread_stop(sdp->sd_recoverd_process); | ||
816 | fail_jinode_gh: | 807 | fail_jinode_gh: |
817 | if (!sdp->sd_args.ar_spectator) | 808 | if (!sdp->sd_args.ar_spectator) |
818 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); | 809 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); |
@@ -1165,6 +1156,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
1165 | 1156 | ||
1166 | sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; | 1157 | sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; |
1167 | sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; | 1158 | sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; |
1159 | sdp->sd_args.ar_commit = 60; | ||
1168 | 1160 | ||
1169 | error = gfs2_mount_args(sdp, &sdp->sd_args, data); | 1161 | error = gfs2_mount_args(sdp, &sdp->sd_args, data); |
1170 | if (error) { | 1162 | if (error) { |
@@ -1172,8 +1164,10 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
1172 | goto fail; | 1164 | goto fail; |
1173 | } | 1165 | } |
1174 | 1166 | ||
1175 | if (sdp->sd_args.ar_spectator) | 1167 | if (sdp->sd_args.ar_spectator) { |
1176 | sb->s_flags |= MS_RDONLY; | 1168 | sb->s_flags |= MS_RDONLY; |
1169 | set_bit(SDF_NORECOVERY, &sdp->sd_flags); | ||
1170 | } | ||
1177 | if (sdp->sd_args.ar_posix_acl) | 1171 | if (sdp->sd_args.ar_posix_acl) |
1178 | sb->s_flags |= MS_POSIXACL; | 1172 | sb->s_flags |= MS_POSIXACL; |
1179 | 1173 | ||
@@ -1191,6 +1185,8 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
1191 | GFS2_BASIC_BLOCK_SHIFT; | 1185 | GFS2_BASIC_BLOCK_SHIFT; |
1192 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | 1186 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; |
1193 | 1187 | ||
1188 | sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; | ||
1189 | |||
1194 | error = init_names(sdp, silent); | 1190 | error = init_names(sdp, silent); |
1195 | if (error) | 1191 | if (error) |
1196 | goto fail; | 1192 | goto fail; |
@@ -1279,9 +1275,22 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags, | |||
1279 | return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); | 1275 | return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); |
1280 | } | 1276 | } |
1281 | 1277 | ||
1282 | static struct super_block *get_gfs2_sb(const char *dev_name) | 1278 | static int test_meta_super(struct super_block *s, void *ptr) |
1279 | { | ||
1280 | struct block_device *bdev = ptr; | ||
1281 | return (bdev == s->s_bdev); | ||
1282 | } | ||
1283 | |||
1284 | static int set_meta_super(struct super_block *s, void *ptr) | ||
1283 | { | 1285 | { |
1284 | struct super_block *sb; | 1286 | return -EINVAL; |
1287 | } | ||
1288 | |||
1289 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | ||
1290 | const char *dev_name, void *data, struct vfsmount *mnt) | ||
1291 | { | ||
1292 | struct super_block *s; | ||
1293 | struct gfs2_sbd *sdp; | ||
1285 | struct path path; | 1294 | struct path path; |
1286 | int error; | 1295 | int error; |
1287 | 1296 | ||
@@ -1289,30 +1298,17 @@ static struct super_block *get_gfs2_sb(const char *dev_name) | |||
1289 | if (error) { | 1298 | if (error) { |
1290 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", | 1299 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", |
1291 | dev_name, error); | 1300 | dev_name, error); |
1292 | return NULL; | 1301 | return error; |
1293 | } | 1302 | } |
1294 | sb = path.dentry->d_inode->i_sb; | 1303 | s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, |
1295 | if (sb && (sb->s_type == &gfs2_fs_type)) | 1304 | path.dentry->d_inode->i_sb->s_bdev); |
1296 | atomic_inc(&sb->s_active); | ||
1297 | else | ||
1298 | sb = NULL; | ||
1299 | path_put(&path); | 1305 | path_put(&path); |
1300 | return sb; | 1306 | if (IS_ERR(s)) { |
1301 | } | ||
1302 | |||
1303 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | ||
1304 | const char *dev_name, void *data, struct vfsmount *mnt) | ||
1305 | { | ||
1306 | struct super_block *sb = NULL; | ||
1307 | struct gfs2_sbd *sdp; | ||
1308 | |||
1309 | sb = get_gfs2_sb(dev_name); | ||
1310 | if (!sb) { | ||
1311 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1307 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); |
1312 | return -ENOENT; | 1308 | return PTR_ERR(s); |
1313 | } | 1309 | } |
1314 | sdp = sb->s_fs_info; | 1310 | sdp = s->s_fs_info; |
1315 | mnt->mnt_sb = sb; | 1311 | mnt->mnt_sb = s; |
1316 | mnt->mnt_root = dget(sdp->sd_master_dir); | 1312 | mnt->mnt_root = dget(sdp->sd_master_dir); |
1317 | return 0; | 1313 | return 0; |
1318 | } | 1314 | } |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1c70fa5168d6..f8bd20baf99c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -262,6 +262,44 @@ out_parent: | |||
262 | return error; | 262 | return error; |
263 | } | 263 | } |
264 | 264 | ||
265 | /* | ||
266 | * gfs2_unlink_ok - check to see that a inode is still in a directory | ||
267 | * @dip: the directory | ||
268 | * @name: the name of the file | ||
269 | * @ip: the inode | ||
270 | * | ||
271 | * Assumes that the lock on (at least) @dip is held. | ||
272 | * | ||
273 | * Returns: 0 if the parent/child relationship is correct, errno if it isn't | ||
274 | */ | ||
275 | |||
276 | static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | ||
277 | const struct gfs2_inode *ip) | ||
278 | { | ||
279 | int error; | ||
280 | |||
281 | if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) | ||
282 | return -EPERM; | ||
283 | |||
284 | if ((dip->i_inode.i_mode & S_ISVTX) && | ||
285 | dip->i_inode.i_uid != current_fsuid() && | ||
286 | ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) | ||
287 | return -EPERM; | ||
288 | |||
289 | if (IS_APPEND(&dip->i_inode)) | ||
290 | return -EPERM; | ||
291 | |||
292 | error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); | ||
293 | if (error) | ||
294 | return error; | ||
295 | |||
296 | error = gfs2_dir_check(&dip->i_inode, name, ip); | ||
297 | if (error) | ||
298 | return error; | ||
299 | |||
300 | return 0; | ||
301 | } | ||
302 | |||
265 | /** | 303 | /** |
266 | * gfs2_unlink - Unlink a file | 304 | * gfs2_unlink - Unlink a file |
267 | * @dir: The inode of the directory containing the file to unlink | 305 | * @dir: The inode of the directory containing the file to unlink |
@@ -473,6 +511,59 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
473 | } | 511 | } |
474 | 512 | ||
475 | /** | 513 | /** |
514 | * gfs2_rmdiri - Remove a directory | ||
515 | * @dip: The parent directory of the directory to be removed | ||
516 | * @name: The name of the directory to be removed | ||
517 | * @ip: The GFS2 inode of the directory to be removed | ||
518 | * | ||
519 | * Assumes Glocks on dip and ip are held | ||
520 | * | ||
521 | * Returns: errno | ||
522 | */ | ||
523 | |||
524 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | ||
525 | struct gfs2_inode *ip) | ||
526 | { | ||
527 | struct qstr dotname; | ||
528 | int error; | ||
529 | |||
530 | if (ip->i_entries != 2) { | ||
531 | if (gfs2_consist_inode(ip)) | ||
532 | gfs2_dinode_print(ip); | ||
533 | return -EIO; | ||
534 | } | ||
535 | |||
536 | error = gfs2_dir_del(dip, name); | ||
537 | if (error) | ||
538 | return error; | ||
539 | |||
540 | error = gfs2_change_nlink(dip, -1); | ||
541 | if (error) | ||
542 | return error; | ||
543 | |||
544 | gfs2_str2qstr(&dotname, "."); | ||
545 | error = gfs2_dir_del(ip, &dotname); | ||
546 | if (error) | ||
547 | return error; | ||
548 | |||
549 | gfs2_str2qstr(&dotname, ".."); | ||
550 | error = gfs2_dir_del(ip, &dotname); | ||
551 | if (error) | ||
552 | return error; | ||
553 | |||
554 | /* It looks odd, but it really should be done twice */ | ||
555 | error = gfs2_change_nlink(ip, -1); | ||
556 | if (error) | ||
557 | return error; | ||
558 | |||
559 | error = gfs2_change_nlink(ip, -1); | ||
560 | if (error) | ||
561 | return error; | ||
562 | |||
563 | return error; | ||
564 | } | ||
565 | |||
566 | /** | ||
476 | * gfs2_rmdir - Remove a directory | 567 | * gfs2_rmdir - Remove a directory |
477 | * @dir: The parent directory of the directory to be removed | 568 | * @dir: The parent directory of the directory to be removed |
478 | * @dentry: The dentry of the directory to remove | 569 | * @dentry: The dentry of the directory to remove |
@@ -885,6 +976,61 @@ out: | |||
885 | } | 976 | } |
886 | 977 | ||
887 | /** | 978 | /** |
979 | * gfs2_readlinki - return the contents of a symlink | ||
980 | * @ip: the symlink's inode | ||
981 | * @buf: a pointer to the buffer to be filled | ||
982 | * @len: a pointer to the length of @buf | ||
983 | * | ||
984 | * If @buf is too small, a piece of memory is kmalloc()ed and needs | ||
985 | * to be freed by the caller. | ||
986 | * | ||
987 | * Returns: errno | ||
988 | */ | ||
989 | |||
990 | static int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | ||
991 | { | ||
992 | struct gfs2_holder i_gh; | ||
993 | struct buffer_head *dibh; | ||
994 | unsigned int x; | ||
995 | int error; | ||
996 | |||
997 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); | ||
998 | error = gfs2_glock_nq(&i_gh); | ||
999 | if (error) { | ||
1000 | gfs2_holder_uninit(&i_gh); | ||
1001 | return error; | ||
1002 | } | ||
1003 | |||
1004 | if (!ip->i_disksize) { | ||
1005 | gfs2_consist_inode(ip); | ||
1006 | error = -EIO; | ||
1007 | goto out; | ||
1008 | } | ||
1009 | |||
1010 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1011 | if (error) | ||
1012 | goto out; | ||
1013 | |||
1014 | x = ip->i_disksize + 1; | ||
1015 | if (x > *len) { | ||
1016 | *buf = kmalloc(x, GFP_NOFS); | ||
1017 | if (!*buf) { | ||
1018 | error = -ENOMEM; | ||
1019 | goto out_brelse; | ||
1020 | } | ||
1021 | } | ||
1022 | |||
1023 | memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); | ||
1024 | *len = x; | ||
1025 | |||
1026 | out_brelse: | ||
1027 | brelse(dibh); | ||
1028 | out: | ||
1029 | gfs2_glock_dq_uninit(&i_gh); | ||
1030 | return error; | ||
1031 | } | ||
1032 | |||
1033 | /** | ||
888 | * gfs2_readlink - Read the value of a symlink | 1034 | * gfs2_readlink - Read the value of a symlink |
889 | * @dentry: the symlink | 1035 | * @dentry: the symlink |
890 | * @buf: the buffer to read the symlink data into | 1036 | * @buf: the buffer to read the symlink data into |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c deleted file mode 100644 index 458019569dcb..000000000000 --- a/fs/gfs2/ops_super.c +++ /dev/null | |||
@@ -1,723 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/spinlock.h> | ||
13 | #include <linux/completion.h> | ||
14 | #include <linux/buffer_head.h> | ||
15 | #include <linux/statfs.h> | ||
16 | #include <linux/seq_file.h> | ||
17 | #include <linux/mount.h> | ||
18 | #include <linux/kthread.h> | ||
19 | #include <linux/delay.h> | ||
20 | #include <linux/gfs2_ondisk.h> | ||
21 | #include <linux/crc32.h> | ||
22 | #include <linux/time.h> | ||
23 | |||
24 | #include "gfs2.h" | ||
25 | #include "incore.h" | ||
26 | #include "glock.h" | ||
27 | #include "inode.h" | ||
28 | #include "log.h" | ||
29 | #include "quota.h" | ||
30 | #include "recovery.h" | ||
31 | #include "rgrp.h" | ||
32 | #include "super.h" | ||
33 | #include "sys.h" | ||
34 | #include "util.h" | ||
35 | #include "trans.h" | ||
36 | #include "dir.h" | ||
37 | #include "eattr.h" | ||
38 | #include "bmap.h" | ||
39 | #include "meta_io.h" | ||
40 | |||
41 | #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) | ||
42 | |||
43 | /** | ||
44 | * gfs2_write_inode - Make sure the inode is stable on the disk | ||
45 | * @inode: The inode | ||
46 | * @sync: synchronous write flag | ||
47 | * | ||
48 | * Returns: errno | ||
49 | */ | ||
50 | |||
51 | static int gfs2_write_inode(struct inode *inode, int sync) | ||
52 | { | ||
53 | struct gfs2_inode *ip = GFS2_I(inode); | ||
54 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
55 | struct gfs2_holder gh; | ||
56 | struct buffer_head *bh; | ||
57 | struct timespec atime; | ||
58 | struct gfs2_dinode *di; | ||
59 | int ret = 0; | ||
60 | |||
61 | /* Check this is a "normal" inode, etc */ | ||
62 | if (!test_bit(GIF_USER, &ip->i_flags) || | ||
63 | (current->flags & PF_MEMALLOC)) | ||
64 | return 0; | ||
65 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
66 | if (ret) | ||
67 | goto do_flush; | ||
68 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
69 | if (ret) | ||
70 | goto do_unlock; | ||
71 | ret = gfs2_meta_inode_buffer(ip, &bh); | ||
72 | if (ret == 0) { | ||
73 | di = (struct gfs2_dinode *)bh->b_data; | ||
74 | atime.tv_sec = be64_to_cpu(di->di_atime); | ||
75 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
76 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
77 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
78 | gfs2_dinode_out(ip, bh->b_data); | ||
79 | } | ||
80 | brelse(bh); | ||
81 | } | ||
82 | gfs2_trans_end(sdp); | ||
83 | do_unlock: | ||
84 | gfs2_glock_dq_uninit(&gh); | ||
85 | do_flush: | ||
86 | if (sync != 0) | ||
87 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
93 | * @sdp: the filesystem | ||
94 | * | ||
95 | * Returns: errno | ||
96 | */ | ||
97 | |||
98 | static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
99 | { | ||
100 | struct gfs2_holder t_gh; | ||
101 | int error; | ||
102 | |||
103 | gfs2_quota_sync(sdp); | ||
104 | gfs2_statfs_sync(sdp); | ||
105 | |||
106 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
107 | &t_gh); | ||
108 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
109 | return error; | ||
110 | |||
111 | gfs2_meta_syncfs(sdp); | ||
112 | gfs2_log_shutdown(sdp); | ||
113 | |||
114 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
115 | |||
116 | if (t_gh.gh_gl) | ||
117 | gfs2_glock_dq_uninit(&t_gh); | ||
118 | |||
119 | gfs2_quota_cleanup(sdp); | ||
120 | |||
121 | return error; | ||
122 | } | ||
123 | |||
124 | /** | ||
125 | * gfs2_put_super - Unmount the filesystem | ||
126 | * @sb: The VFS superblock | ||
127 | * | ||
128 | */ | ||
129 | |||
130 | static void gfs2_put_super(struct super_block *sb) | ||
131 | { | ||
132 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
133 | int error; | ||
134 | |||
135 | /* Unfreeze the filesystem, if we need to */ | ||
136 | |||
137 | mutex_lock(&sdp->sd_freeze_lock); | ||
138 | if (sdp->sd_freeze_count) | ||
139 | gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); | ||
140 | mutex_unlock(&sdp->sd_freeze_lock); | ||
141 | |||
142 | kthread_stop(sdp->sd_quotad_process); | ||
143 | kthread_stop(sdp->sd_logd_process); | ||
144 | kthread_stop(sdp->sd_recoverd_process); | ||
145 | |||
146 | if (!(sb->s_flags & MS_RDONLY)) { | ||
147 | error = gfs2_make_fs_ro(sdp); | ||
148 | if (error) | ||
149 | gfs2_io_error(sdp); | ||
150 | } | ||
151 | /* At this point, we're through modifying the disk */ | ||
152 | |||
153 | /* Release stuff */ | ||
154 | |||
155 | iput(sdp->sd_jindex); | ||
156 | iput(sdp->sd_inum_inode); | ||
157 | iput(sdp->sd_statfs_inode); | ||
158 | iput(sdp->sd_rindex); | ||
159 | iput(sdp->sd_quota_inode); | ||
160 | |||
161 | gfs2_glock_put(sdp->sd_rename_gl); | ||
162 | gfs2_glock_put(sdp->sd_trans_gl); | ||
163 | |||
164 | if (!sdp->sd_args.ar_spectator) { | ||
165 | gfs2_glock_dq_uninit(&sdp->sd_journal_gh); | ||
166 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); | ||
167 | gfs2_glock_dq_uninit(&sdp->sd_ir_gh); | ||
168 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); | ||
169 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); | ||
170 | iput(sdp->sd_ir_inode); | ||
171 | iput(sdp->sd_sc_inode); | ||
172 | iput(sdp->sd_qc_inode); | ||
173 | } | ||
174 | |||
175 | gfs2_glock_dq_uninit(&sdp->sd_live_gh); | ||
176 | gfs2_clear_rgrpd(sdp); | ||
177 | gfs2_jindex_free(sdp); | ||
178 | /* Take apart glock structures and buffer lists */ | ||
179 | gfs2_gl_hash_clear(sdp); | ||
180 | /* Unmount the locking protocol */ | ||
181 | gfs2_lm_unmount(sdp); | ||
182 | |||
183 | /* At this point, we're through participating in the lockspace */ | ||
184 | gfs2_sys_fs_del(sdp); | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * gfs2_write_super | ||
189 | * @sb: the superblock | ||
190 | * | ||
191 | */ | ||
192 | |||
193 | static void gfs2_write_super(struct super_block *sb) | ||
194 | { | ||
195 | sb->s_dirt = 0; | ||
196 | } | ||
197 | |||
198 | /** | ||
199 | * gfs2_sync_fs - sync the filesystem | ||
200 | * @sb: the superblock | ||
201 | * | ||
202 | * Flushes the log to disk. | ||
203 | */ | ||
204 | |||
205 | static int gfs2_sync_fs(struct super_block *sb, int wait) | ||
206 | { | ||
207 | sb->s_dirt = 0; | ||
208 | if (wait && sb->s_fs_info) | ||
209 | gfs2_log_flush(sb->s_fs_info, NULL); | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | /** | ||
214 | * gfs2_freeze - prevent further writes to the filesystem | ||
215 | * @sb: the VFS structure for the filesystem | ||
216 | * | ||
217 | */ | ||
218 | |||
219 | static int gfs2_freeze(struct super_block *sb) | ||
220 | { | ||
221 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
222 | int error; | ||
223 | |||
224 | if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
225 | return -EINVAL; | ||
226 | |||
227 | for (;;) { | ||
228 | error = gfs2_freeze_fs(sdp); | ||
229 | if (!error) | ||
230 | break; | ||
231 | |||
232 | switch (error) { | ||
233 | case -EBUSY: | ||
234 | fs_err(sdp, "waiting for recovery before freeze\n"); | ||
235 | break; | ||
236 | |||
237 | default: | ||
238 | fs_err(sdp, "error freezing FS: %d\n", error); | ||
239 | break; | ||
240 | } | ||
241 | |||
242 | fs_err(sdp, "retrying...\n"); | ||
243 | msleep(1000); | ||
244 | } | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * gfs2_unfreeze - reallow writes to the filesystem | ||
250 | * @sb: the VFS structure for the filesystem | ||
251 | * | ||
252 | */ | ||
253 | |||
254 | static int gfs2_unfreeze(struct super_block *sb) | ||
255 | { | ||
256 | gfs2_unfreeze_fs(sb->s_fs_info); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | /** | ||
261 | * statfs_fill - fill in the sg for a given RG | ||
262 | * @rgd: the RG | ||
263 | * @sc: the sc structure | ||
264 | * | ||
265 | * Returns: 0 on success, -ESTALE if the LVB is invalid | ||
266 | */ | ||
267 | |||
268 | static int statfs_slow_fill(struct gfs2_rgrpd *rgd, | ||
269 | struct gfs2_statfs_change_host *sc) | ||
270 | { | ||
271 | gfs2_rgrp_verify(rgd); | ||
272 | sc->sc_total += rgd->rd_data; | ||
273 | sc->sc_free += rgd->rd_free; | ||
274 | sc->sc_dinodes += rgd->rd_dinodes; | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | /** | ||
279 | * gfs2_statfs_slow - Stat a filesystem using asynchronous locking | ||
280 | * @sdp: the filesystem | ||
281 | * @sc: the sc info that will be returned | ||
282 | * | ||
283 | * Any error (other than a signal) will cause this routine to fall back | ||
284 | * to the synchronous version. | ||
285 | * | ||
286 | * FIXME: This really shouldn't busy wait like this. | ||
287 | * | ||
288 | * Returns: errno | ||
289 | */ | ||
290 | |||
291 | static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) | ||
292 | { | ||
293 | struct gfs2_holder ri_gh; | ||
294 | struct gfs2_rgrpd *rgd_next; | ||
295 | struct gfs2_holder *gha, *gh; | ||
296 | unsigned int slots = 64; | ||
297 | unsigned int x; | ||
298 | int done; | ||
299 | int error = 0, err; | ||
300 | |||
301 | memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); | ||
302 | gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); | ||
303 | if (!gha) | ||
304 | return -ENOMEM; | ||
305 | |||
306 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
307 | if (error) | ||
308 | goto out; | ||
309 | |||
310 | rgd_next = gfs2_rgrpd_get_first(sdp); | ||
311 | |||
312 | for (;;) { | ||
313 | done = 1; | ||
314 | |||
315 | for (x = 0; x < slots; x++) { | ||
316 | gh = gha + x; | ||
317 | |||
318 | if (gh->gh_gl && gfs2_glock_poll(gh)) { | ||
319 | err = gfs2_glock_wait(gh); | ||
320 | if (err) { | ||
321 | gfs2_holder_uninit(gh); | ||
322 | error = err; | ||
323 | } else { | ||
324 | if (!error) | ||
325 | error = statfs_slow_fill( | ||
326 | gh->gh_gl->gl_object, sc); | ||
327 | gfs2_glock_dq_uninit(gh); | ||
328 | } | ||
329 | } | ||
330 | |||
331 | if (gh->gh_gl) | ||
332 | done = 0; | ||
333 | else if (rgd_next && !error) { | ||
334 | error = gfs2_glock_nq_init(rgd_next->rd_gl, | ||
335 | LM_ST_SHARED, | ||
336 | GL_ASYNC, | ||
337 | gh); | ||
338 | rgd_next = gfs2_rgrpd_get_next(rgd_next); | ||
339 | done = 0; | ||
340 | } | ||
341 | |||
342 | if (signal_pending(current)) | ||
343 | error = -ERESTARTSYS; | ||
344 | } | ||
345 | |||
346 | if (done) | ||
347 | break; | ||
348 | |||
349 | yield(); | ||
350 | } | ||
351 | |||
352 | gfs2_glock_dq_uninit(&ri_gh); | ||
353 | |||
354 | out: | ||
355 | kfree(gha); | ||
356 | return error; | ||
357 | } | ||
358 | |||
359 | /** | ||
360 | * gfs2_statfs_i - Do a statfs | ||
361 | * @sdp: the filesystem | ||
362 | * @sg: the sg structure | ||
363 | * | ||
364 | * Returns: errno | ||
365 | */ | ||
366 | |||
367 | static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) | ||
368 | { | ||
369 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; | ||
370 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; | ||
371 | |||
372 | spin_lock(&sdp->sd_statfs_spin); | ||
373 | |||
374 | *sc = *m_sc; | ||
375 | sc->sc_total += l_sc->sc_total; | ||
376 | sc->sc_free += l_sc->sc_free; | ||
377 | sc->sc_dinodes += l_sc->sc_dinodes; | ||
378 | |||
379 | spin_unlock(&sdp->sd_statfs_spin); | ||
380 | |||
381 | if (sc->sc_free < 0) | ||
382 | sc->sc_free = 0; | ||
383 | if (sc->sc_free > sc->sc_total) | ||
384 | sc->sc_free = sc->sc_total; | ||
385 | if (sc->sc_dinodes < 0) | ||
386 | sc->sc_dinodes = 0; | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | /** | ||
392 | * gfs2_statfs - Gather and return stats about the filesystem | ||
393 | * @sb: The superblock | ||
394 | * @statfsbuf: The buffer | ||
395 | * | ||
396 | * Returns: 0 on success or error code | ||
397 | */ | ||
398 | |||
399 | static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
400 | { | ||
401 | struct super_block *sb = dentry->d_inode->i_sb; | ||
402 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
403 | struct gfs2_statfs_change_host sc; | ||
404 | int error; | ||
405 | |||
406 | if (gfs2_tune_get(sdp, gt_statfs_slow)) | ||
407 | error = gfs2_statfs_slow(sdp, &sc); | ||
408 | else | ||
409 | error = gfs2_statfs_i(sdp, &sc); | ||
410 | |||
411 | if (error) | ||
412 | return error; | ||
413 | |||
414 | buf->f_type = GFS2_MAGIC; | ||
415 | buf->f_bsize = sdp->sd_sb.sb_bsize; | ||
416 | buf->f_blocks = sc.sc_total; | ||
417 | buf->f_bfree = sc.sc_free; | ||
418 | buf->f_bavail = sc.sc_free; | ||
419 | buf->f_files = sc.sc_dinodes + sc.sc_free; | ||
420 | buf->f_ffree = sc.sc_free; | ||
421 | buf->f_namelen = GFS2_FNAMESIZE; | ||
422 | |||
423 | return 0; | ||
424 | } | ||
425 | |||
426 | /** | ||
427 | * gfs2_remount_fs - called when the FS is remounted | ||
428 | * @sb: the filesystem | ||
429 | * @flags: the remount flags | ||
430 | * @data: extra data passed in (not used right now) | ||
431 | * | ||
432 | * Returns: errno | ||
433 | */ | ||
434 | |||
435 | static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | ||
436 | { | ||
437 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
438 | struct gfs2_args args = sdp->sd_args; /* Default to current settings */ | ||
439 | int error; | ||
440 | |||
441 | error = gfs2_mount_args(sdp, &args, data); | ||
442 | if (error) | ||
443 | return error; | ||
444 | |||
445 | /* Not allowed to change locking details */ | ||
446 | if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) || | ||
447 | strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) || | ||
448 | strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata)) | ||
449 | return -EINVAL; | ||
450 | |||
451 | /* Some flags must not be changed */ | ||
452 | if (args_neq(&args, &sdp->sd_args, spectator) || | ||
453 | args_neq(&args, &sdp->sd_args, ignore_local_fs) || | ||
454 | args_neq(&args, &sdp->sd_args, localflocks) || | ||
455 | args_neq(&args, &sdp->sd_args, localcaching) || | ||
456 | args_neq(&args, &sdp->sd_args, meta)) | ||
457 | return -EINVAL; | ||
458 | |||
459 | if (sdp->sd_args.ar_spectator) | ||
460 | *flags |= MS_RDONLY; | ||
461 | |||
462 | if ((sb->s_flags ^ *flags) & MS_RDONLY) { | ||
463 | if (*flags & MS_RDONLY) | ||
464 | error = gfs2_make_fs_ro(sdp); | ||
465 | else | ||
466 | error = gfs2_make_fs_rw(sdp); | ||
467 | if (error) | ||
468 | return error; | ||
469 | } | ||
470 | |||
471 | sdp->sd_args = args; | ||
472 | if (sdp->sd_args.ar_posix_acl) | ||
473 | sb->s_flags |= MS_POSIXACL; | ||
474 | else | ||
475 | sb->s_flags &= ~MS_POSIXACL; | ||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | /** | ||
480 | * gfs2_drop_inode - Drop an inode (test for remote unlink) | ||
481 | * @inode: The inode to drop | ||
482 | * | ||
483 | * If we've received a callback on an iopen lock then its because a | ||
484 | * remote node tried to deallocate the inode but failed due to this node | ||
485 | * still having the inode open. Here we mark the link count zero | ||
486 | * since we know that it must have reached zero if the GLF_DEMOTE flag | ||
487 | * is set on the iopen glock. If we didn't do a disk read since the | ||
488 | * remote node removed the final link then we might otherwise miss | ||
489 | * this event. This check ensures that this node will deallocate the | ||
490 | * inode's blocks, or alternatively pass the baton on to another | ||
491 | * node for later deallocation. | ||
492 | */ | ||
493 | |||
494 | static void gfs2_drop_inode(struct inode *inode) | ||
495 | { | ||
496 | struct gfs2_inode *ip = GFS2_I(inode); | ||
497 | |||
498 | if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { | ||
499 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; | ||
500 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
501 | clear_nlink(inode); | ||
502 | } | ||
503 | generic_drop_inode(inode); | ||
504 | } | ||
505 | |||
506 | /** | ||
507 | * gfs2_clear_inode - Deallocate an inode when VFS is done with it | ||
508 | * @inode: The VFS inode | ||
509 | * | ||
510 | */ | ||
511 | |||
512 | static void gfs2_clear_inode(struct inode *inode) | ||
513 | { | ||
514 | struct gfs2_inode *ip = GFS2_I(inode); | ||
515 | |||
516 | /* This tells us its a "real" inode and not one which only | ||
517 | * serves to contain an address space (see rgrp.c, meta_io.c) | ||
518 | * which therefore doesn't have its own glocks. | ||
519 | */ | ||
520 | if (test_bit(GIF_USER, &ip->i_flags)) { | ||
521 | ip->i_gl->gl_object = NULL; | ||
522 | gfs2_glock_put(ip->i_gl); | ||
523 | ip->i_gl = NULL; | ||
524 | if (ip->i_iopen_gh.gh_gl) { | ||
525 | ip->i_iopen_gh.gh_gl->gl_object = NULL; | ||
526 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
527 | } | ||
528 | } | ||
529 | } | ||
530 | |||
531 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | ||
532 | { | ||
533 | do { | ||
534 | if (d1 == d2) | ||
535 | return 1; | ||
536 | d1 = d1->d_parent; | ||
537 | } while (!IS_ROOT(d1)); | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | /** | ||
542 | * gfs2_show_options - Show mount options for /proc/mounts | ||
543 | * @s: seq_file structure | ||
544 | * @mnt: vfsmount | ||
545 | * | ||
546 | * Returns: 0 on success or error code | ||
547 | */ | ||
548 | |||
549 | static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | ||
550 | { | ||
551 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | ||
552 | struct gfs2_args *args = &sdp->sd_args; | ||
553 | |||
554 | if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) | ||
555 | seq_printf(s, ",meta"); | ||
556 | if (args->ar_lockproto[0]) | ||
557 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | ||
558 | if (args->ar_locktable[0]) | ||
559 | seq_printf(s, ",locktable=%s", args->ar_locktable); | ||
560 | if (args->ar_hostdata[0]) | ||
561 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | ||
562 | if (args->ar_spectator) | ||
563 | seq_printf(s, ",spectator"); | ||
564 | if (args->ar_ignore_local_fs) | ||
565 | seq_printf(s, ",ignore_local_fs"); | ||
566 | if (args->ar_localflocks) | ||
567 | seq_printf(s, ",localflocks"); | ||
568 | if (args->ar_localcaching) | ||
569 | seq_printf(s, ",localcaching"); | ||
570 | if (args->ar_debug) | ||
571 | seq_printf(s, ",debug"); | ||
572 | if (args->ar_upgrade) | ||
573 | seq_printf(s, ",upgrade"); | ||
574 | if (args->ar_posix_acl) | ||
575 | seq_printf(s, ",acl"); | ||
576 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | ||
577 | char *state; | ||
578 | switch (args->ar_quota) { | ||
579 | case GFS2_QUOTA_OFF: | ||
580 | state = "off"; | ||
581 | break; | ||
582 | case GFS2_QUOTA_ACCOUNT: | ||
583 | state = "account"; | ||
584 | break; | ||
585 | case GFS2_QUOTA_ON: | ||
586 | state = "on"; | ||
587 | break; | ||
588 | default: | ||
589 | state = "unknown"; | ||
590 | break; | ||
591 | } | ||
592 | seq_printf(s, ",quota=%s", state); | ||
593 | } | ||
594 | if (args->ar_suiddir) | ||
595 | seq_printf(s, ",suiddir"); | ||
596 | if (args->ar_data != GFS2_DATA_DEFAULT) { | ||
597 | char *state; | ||
598 | switch (args->ar_data) { | ||
599 | case GFS2_DATA_WRITEBACK: | ||
600 | state = "writeback"; | ||
601 | break; | ||
602 | case GFS2_DATA_ORDERED: | ||
603 | state = "ordered"; | ||
604 | break; | ||
605 | default: | ||
606 | state = "unknown"; | ||
607 | break; | ||
608 | } | ||
609 | seq_printf(s, ",data=%s", state); | ||
610 | } | ||
611 | if (args->ar_discard) | ||
612 | seq_printf(s, ",discard"); | ||
613 | |||
614 | return 0; | ||
615 | } | ||
616 | |||
617 | /* | ||
618 | * We have to (at the moment) hold the inodes main lock to cover | ||
619 | * the gap between unlocking the shared lock on the iopen lock and | ||
620 | * taking the exclusive lock. I'd rather do a shared -> exclusive | ||
621 | * conversion on the iopen lock, but we can change that later. This | ||
622 | * is safe, just less efficient. | ||
623 | */ | ||
624 | |||
625 | static void gfs2_delete_inode(struct inode *inode) | ||
626 | { | ||
627 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | ||
628 | struct gfs2_inode *ip = GFS2_I(inode); | ||
629 | struct gfs2_holder gh; | ||
630 | int error; | ||
631 | |||
632 | if (!test_bit(GIF_USER, &ip->i_flags)) | ||
633 | goto out; | ||
634 | |||
635 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
636 | if (unlikely(error)) { | ||
637 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
638 | goto out; | ||
639 | } | ||
640 | |||
641 | gfs2_glock_dq_wait(&ip->i_iopen_gh); | ||
642 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); | ||
643 | error = gfs2_glock_nq(&ip->i_iopen_gh); | ||
644 | if (error) | ||
645 | goto out_truncate; | ||
646 | |||
647 | if (S_ISDIR(inode->i_mode) && | ||
648 | (ip->i_diskflags & GFS2_DIF_EXHASH)) { | ||
649 | error = gfs2_dir_exhash_dealloc(ip); | ||
650 | if (error) | ||
651 | goto out_unlock; | ||
652 | } | ||
653 | |||
654 | if (ip->i_eattr) { | ||
655 | error = gfs2_ea_dealloc(ip); | ||
656 | if (error) | ||
657 | goto out_unlock; | ||
658 | } | ||
659 | |||
660 | if (!gfs2_is_stuffed(ip)) { | ||
661 | error = gfs2_file_dealloc(ip); | ||
662 | if (error) | ||
663 | goto out_unlock; | ||
664 | } | ||
665 | |||
666 | error = gfs2_dinode_dealloc(ip); | ||
667 | if (error) | ||
668 | goto out_unlock; | ||
669 | |||
670 | out_truncate: | ||
671 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | ||
672 | if (error) | ||
673 | goto out_unlock; | ||
674 | /* Needs to be done before glock release & also in a transaction */ | ||
675 | truncate_inode_pages(&inode->i_data, 0); | ||
676 | gfs2_trans_end(sdp); | ||
677 | |||
678 | out_unlock: | ||
679 | if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) | ||
680 | gfs2_glock_dq(&ip->i_iopen_gh); | ||
681 | gfs2_holder_uninit(&ip->i_iopen_gh); | ||
682 | gfs2_glock_dq_uninit(&gh); | ||
683 | if (error && error != GLR_TRYFAILED) | ||
684 | fs_warn(sdp, "gfs2_delete_inode: %d\n", error); | ||
685 | out: | ||
686 | truncate_inode_pages(&inode->i_data, 0); | ||
687 | clear_inode(inode); | ||
688 | } | ||
689 | |||
690 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | ||
691 | { | ||
692 | struct gfs2_inode *ip; | ||
693 | |||
694 | ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); | ||
695 | if (ip) { | ||
696 | ip->i_flags = 0; | ||
697 | ip->i_gl = NULL; | ||
698 | } | ||
699 | return &ip->i_inode; | ||
700 | } | ||
701 | |||
702 | static void gfs2_destroy_inode(struct inode *inode) | ||
703 | { | ||
704 | kmem_cache_free(gfs2_inode_cachep, inode); | ||
705 | } | ||
706 | |||
707 | const struct super_operations gfs2_super_ops = { | ||
708 | .alloc_inode = gfs2_alloc_inode, | ||
709 | .destroy_inode = gfs2_destroy_inode, | ||
710 | .write_inode = gfs2_write_inode, | ||
711 | .delete_inode = gfs2_delete_inode, | ||
712 | .put_super = gfs2_put_super, | ||
713 | .write_super = gfs2_write_super, | ||
714 | .sync_fs = gfs2_sync_fs, | ||
715 | .freeze_fs = gfs2_freeze, | ||
716 | .unfreeze_fs = gfs2_unfreeze, | ||
717 | .statfs = gfs2_statfs, | ||
718 | .remount_fs = gfs2_remount_fs, | ||
719 | .clear_inode = gfs2_clear_inode, | ||
720 | .drop_inode = gfs2_drop_inode, | ||
721 | .show_options = gfs2_show_options, | ||
722 | }; | ||
723 | |||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 152e6c4a0dca..2e9b9326bfc9 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -60,7 +60,6 @@ | |||
60 | #include "super.h" | 60 | #include "super.h" |
61 | #include "trans.h" | 61 | #include "trans.h" |
62 | #include "inode.h" | 62 | #include "inode.h" |
63 | #include "ops_address.h" | ||
64 | #include "util.h" | 63 | #include "util.h" |
65 | 64 | ||
66 | #define QUOTA_USER 1 | 65 | #define QUOTA_USER 1 |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 247e8f7d6b3d..59d2695509d3 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -13,8 +13,7 @@ | |||
13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
14 | #include <linux/gfs2_ondisk.h> | 14 | #include <linux/gfs2_ondisk.h> |
15 | #include <linux/crc32.h> | 15 | #include <linux/crc32.h> |
16 | #include <linux/kthread.h> | 16 | #include <linux/slow-work.h> |
17 | #include <linux/freezer.h> | ||
18 | 17 | ||
19 | #include "gfs2.h" | 18 | #include "gfs2.h" |
20 | #include "incore.h" | 19 | #include "incore.h" |
@@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | |||
441 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); | 440 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); |
442 | } | 441 | } |
443 | 442 | ||
444 | /** | 443 | static int gfs2_recover_get_ref(struct slow_work *work) |
445 | * gfs2_recover_journal - recover a given journal | 444 | { |
446 | * @jd: the struct gfs2_jdesc describing the journal | 445 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); |
447 | * | 446 | if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) |
448 | * Acquire the journal's lock, check to see if the journal is clean, and | 447 | return -EBUSY; |
449 | * do recovery if necessary. | 448 | return 0; |
450 | * | 449 | } |
451 | * Returns: errno | ||
452 | */ | ||
453 | 450 | ||
454 | int gfs2_recover_journal(struct gfs2_jdesc *jd) | 451 | static void gfs2_recover_put_ref(struct slow_work *work) |
452 | { | ||
453 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); | ||
454 | clear_bit(JDF_RECOVERY, &jd->jd_flags); | ||
455 | smp_mb__after_clear_bit(); | ||
456 | wake_up_bit(&jd->jd_flags, JDF_RECOVERY); | ||
457 | } | ||
458 | |||
459 | static void gfs2_recover_work(struct slow_work *work) | ||
455 | { | 460 | { |
461 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); | ||
456 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | 462 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); |
457 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 463 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
458 | struct gfs2_log_header_host head; | 464 | struct gfs2_log_header_host head; |
@@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) | |||
569 | gfs2_glock_dq_uninit(&j_gh); | 575 | gfs2_glock_dq_uninit(&j_gh); |
570 | 576 | ||
571 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | 577 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); |
572 | return 0; | 578 | return; |
573 | 579 | ||
574 | fail_gunlock_tr: | 580 | fail_gunlock_tr: |
575 | gfs2_glock_dq_uninit(&t_gh); | 581 | gfs2_glock_dq_uninit(&t_gh); |
@@ -584,70 +590,28 @@ fail_gunlock_j: | |||
584 | 590 | ||
585 | fail: | 591 | fail: |
586 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); | 592 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); |
587 | return error; | ||
588 | } | 593 | } |
589 | 594 | ||
590 | static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) | 595 | struct slow_work_ops gfs2_recover_ops = { |
591 | { | 596 | .get_ref = gfs2_recover_get_ref, |
592 | struct gfs2_jdesc *jd; | 597 | .put_ref = gfs2_recover_put_ref, |
593 | int found = 0; | 598 | .execute = gfs2_recover_work, |
594 | 599 | }; | |
595 | spin_lock(&sdp->sd_jindex_spin); | ||
596 | 600 | ||
597 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | ||
598 | if (jd->jd_dirty) { | ||
599 | jd->jd_dirty = 0; | ||
600 | found = 1; | ||
601 | break; | ||
602 | } | ||
603 | } | ||
604 | spin_unlock(&sdp->sd_jindex_spin); | ||
605 | |||
606 | if (!found) | ||
607 | jd = NULL; | ||
608 | 601 | ||
609 | return jd; | 602 | static int gfs2_recovery_wait(void *word) |
610 | } | ||
611 | |||
612 | /** | ||
613 | * gfs2_check_journals - Recover any dirty journals | ||
614 | * @sdp: the filesystem | ||
615 | * | ||
616 | */ | ||
617 | |||
618 | static void gfs2_check_journals(struct gfs2_sbd *sdp) | ||
619 | { | 603 | { |
620 | struct gfs2_jdesc *jd; | 604 | schedule(); |
621 | 605 | return 0; | |
622 | for (;;) { | ||
623 | jd = gfs2_jdesc_find_dirty(sdp); | ||
624 | if (!jd) | ||
625 | break; | ||
626 | |||
627 | if (jd != sdp->sd_jdesc) | ||
628 | gfs2_recover_journal(jd); | ||
629 | } | ||
630 | } | 606 | } |
631 | 607 | ||
632 | /** | 608 | int gfs2_recover_journal(struct gfs2_jdesc *jd) |
633 | * gfs2_recoverd - Recover dead machine's journals | ||
634 | * @sdp: Pointer to GFS2 superblock | ||
635 | * | ||
636 | */ | ||
637 | |||
638 | int gfs2_recoverd(void *data) | ||
639 | { | 609 | { |
640 | struct gfs2_sbd *sdp = data; | 610 | int rv; |
641 | unsigned long t; | 611 | rv = slow_work_enqueue(&jd->jd_work); |
642 | 612 | if (rv) | |
643 | while (!kthread_should_stop()) { | 613 | return rv; |
644 | gfs2_check_journals(sdp); | 614 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); |
645 | t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; | ||
646 | if (freezing(current)) | ||
647 | refrigerator(); | ||
648 | schedule_timeout_interruptible(t); | ||
649 | } | ||
650 | |||
651 | return 0; | 615 | return 0; |
652 | } | 616 | } |
653 | 617 | ||
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index a8218ea15b57..1616ac22569a 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h | |||
@@ -28,7 +28,7 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); | |||
28 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, | 28 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, |
29 | struct gfs2_log_header_host *head); | 29 | struct gfs2_log_header_host *head); |
30 | extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); | 30 | extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); |
31 | extern int gfs2_recoverd(void *data); | 31 | extern struct slow_work_ops gfs2_recover_ops; |
32 | 32 | ||
33 | #endif /* __RECOVERY_DOT_H__ */ | 33 | #endif /* __RECOVERY_DOT_H__ */ |
34 | 34 | ||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa2..daa4ae341a29 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include "util.h" | 29 | #include "util.h" |
30 | #include "log.h" | 30 | #include "log.h" |
31 | #include "inode.h" | 31 | #include "inode.h" |
32 | #include "ops_address.h" | 32 | #include "trace_gfs2.h" |
33 | 33 | ||
34 | #define BFITNOENT ((u32)~0) | 34 | #define BFITNOENT ((u32)~0) |
35 | #define NO_BLOCK ((u64)~0) | 35 | #define NO_BLOCK ((u64)~0) |
@@ -442,6 +442,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) | |||
442 | for (x = 0; x < length; x++) { | 442 | for (x = 0; x < length; x++) { |
443 | bi = rgd->rd_bits + x; | 443 | bi = rgd->rd_bits + x; |
444 | 444 | ||
445 | bi->bi_flags = 0; | ||
445 | /* small rgrp; bitmap stored completely in header block */ | 446 | /* small rgrp; bitmap stored completely in header block */ |
446 | if (length == 1) { | 447 | if (length == 1) { |
447 | bytes = bytes_left; | 448 | bytes = bytes_left; |
@@ -580,7 +581,6 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
580 | 581 | ||
581 | rgd->rd_gl->gl_object = rgd; | 582 | rgd->rd_gl->gl_object = rgd; |
582 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | 583 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
583 | rgd->rd_flags |= GFS2_RDF_CHECK; | ||
584 | return error; | 584 | return error; |
585 | } | 585 | } |
586 | 586 | ||
@@ -701,10 +701,9 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) | |||
701 | u32 rg_flags; | 701 | u32 rg_flags; |
702 | 702 | ||
703 | rg_flags = be32_to_cpu(str->rg_flags); | 703 | rg_flags = be32_to_cpu(str->rg_flags); |
704 | if (rg_flags & GFS2_RGF_NOALLOC) | 704 | rg_flags &= ~GFS2_RDF_MASK; |
705 | rgd->rd_flags |= GFS2_RDF_NOALLOC; | 705 | rgd->rd_flags &= GFS2_RDF_MASK; |
706 | else | 706 | rgd->rd_flags |= rg_flags; |
707 | rgd->rd_flags &= ~GFS2_RDF_NOALLOC; | ||
708 | rgd->rd_free = be32_to_cpu(str->rg_free); | 707 | rgd->rd_free = be32_to_cpu(str->rg_free); |
709 | rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); | 708 | rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); |
710 | rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); | 709 | rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); |
@@ -713,11 +712,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) | |||
713 | static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) | 712 | static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) |
714 | { | 713 | { |
715 | struct gfs2_rgrp *str = buf; | 714 | struct gfs2_rgrp *str = buf; |
716 | u32 rg_flags = 0; | ||
717 | 715 | ||
718 | if (rgd->rd_flags & GFS2_RDF_NOALLOC) | 716 | str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); |
719 | rg_flags |= GFS2_RGF_NOALLOC; | ||
720 | str->rg_flags = cpu_to_be32(rg_flags); | ||
721 | str->rg_free = cpu_to_be32(rgd->rd_free); | 717 | str->rg_free = cpu_to_be32(rgd->rd_free); |
722 | str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); | 718 | str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); |
723 | str->__pad = cpu_to_be32(0); | 719 | str->__pad = cpu_to_be32(0); |
@@ -775,8 +771,10 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
775 | } | 771 | } |
776 | 772 | ||
777 | if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { | 773 | if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { |
774 | for (x = 0; x < length; x++) | ||
775 | clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); | ||
778 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); | 776 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); |
779 | rgd->rd_flags |= GFS2_RDF_UPTODATE; | 777 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); |
780 | } | 778 | } |
781 | 779 | ||
782 | spin_lock(&sdp->sd_rindex_spin); | 780 | spin_lock(&sdp->sd_rindex_spin); |
@@ -845,7 +843,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
845 | struct super_block *sb = sdp->sd_vfs; | 843 | struct super_block *sb = sdp->sd_vfs; |
846 | struct block_device *bdev = sb->s_bdev; | 844 | struct block_device *bdev = sb->s_bdev; |
847 | const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / | 845 | const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / |
848 | bdev_hardsect_size(sb->s_bdev); | 846 | bdev_logical_block_size(sb->s_bdev); |
849 | u64 blk; | 847 | u64 blk; |
850 | sector_t start = 0; | 848 | sector_t start = 0; |
851 | sector_t nr_sects = 0; | 849 | sector_t nr_sects = 0; |
@@ -903,6 +901,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) | |||
903 | continue; | 901 | continue; |
904 | if (sdp->sd_args.ar_discard) | 902 | if (sdp->sd_args.ar_discard) |
905 | gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); | 903 | gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); |
904 | clear_bit(GBF_FULL, &bi->bi_flags); | ||
906 | memcpy(bi->bi_clone + bi->bi_offset, | 905 | memcpy(bi->bi_clone + bi->bi_offset, |
907 | bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); | 906 | bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); |
908 | } | 907 | } |
@@ -942,7 +941,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | |||
942 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 941 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
943 | int ret = 0; | 942 | int ret = 0; |
944 | 943 | ||
945 | if (rgd->rd_flags & GFS2_RDF_NOALLOC) | 944 | if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) |
946 | return 0; | 945 | return 0; |
947 | 946 | ||
948 | spin_lock(&sdp->sd_rindex_spin); | 947 | spin_lock(&sdp->sd_rindex_spin); |
@@ -1315,30 +1314,37 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
1315 | { | 1314 | { |
1316 | struct gfs2_bitmap *bi = NULL; | 1315 | struct gfs2_bitmap *bi = NULL; |
1317 | const u32 length = rgd->rd_length; | 1316 | const u32 length = rgd->rd_length; |
1318 | u32 blk = 0; | 1317 | u32 blk = BFITNOENT; |
1319 | unsigned int buf, x; | 1318 | unsigned int buf, x; |
1320 | const unsigned int elen = *n; | 1319 | const unsigned int elen = *n; |
1321 | const u8 *buffer; | 1320 | const u8 *buffer = NULL; |
1322 | 1321 | ||
1323 | *n = 0; | 1322 | *n = 0; |
1324 | /* Find bitmap block that contains bits for goal block */ | 1323 | /* Find bitmap block that contains bits for goal block */ |
1325 | for (buf = 0; buf < length; buf++) { | 1324 | for (buf = 0; buf < length; buf++) { |
1326 | bi = rgd->rd_bits + buf; | 1325 | bi = rgd->rd_bits + buf; |
1327 | if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) | 1326 | /* Convert scope of "goal" from rgrp-wide to within found bit block */ |
1328 | break; | 1327 | if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { |
1328 | goal -= bi->bi_start * GFS2_NBBY; | ||
1329 | goto do_search; | ||
1330 | } | ||
1329 | } | 1331 | } |
1332 | buf = 0; | ||
1333 | goal = 0; | ||
1330 | 1334 | ||
1331 | gfs2_assert(rgd->rd_sbd, buf < length); | 1335 | do_search: |
1332 | |||
1333 | /* Convert scope of "goal" from rgrp-wide to within found bit block */ | ||
1334 | goal -= bi->bi_start * GFS2_NBBY; | ||
1335 | |||
1336 | /* Search (up to entire) bitmap in this rgrp for allocatable block. | 1336 | /* Search (up to entire) bitmap in this rgrp for allocatable block. |
1337 | "x <= length", instead of "x < length", because we typically start | 1337 | "x <= length", instead of "x < length", because we typically start |
1338 | the search in the middle of a bit block, but if we can't find an | 1338 | the search in the middle of a bit block, but if we can't find an |
1339 | allocatable block anywhere else, we want to be able wrap around and | 1339 | allocatable block anywhere else, we want to be able wrap around and |
1340 | search in the first part of our first-searched bit block. */ | 1340 | search in the first part of our first-searched bit block. */ |
1341 | for (x = 0; x <= length; x++) { | 1341 | for (x = 0; x <= length; x++) { |
1342 | bi = rgd->rd_bits + buf; | ||
1343 | |||
1344 | if (test_bit(GBF_FULL, &bi->bi_flags) && | ||
1345 | (old_state == GFS2_BLKST_FREE)) | ||
1346 | goto skip; | ||
1347 | |||
1342 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone | 1348 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone |
1343 | bitmaps, so we must search the originals for that. */ | 1349 | bitmaps, so we must search the originals for that. */ |
1344 | buffer = bi->bi_bh->b_data + bi->bi_offset; | 1350 | buffer = bi->bi_bh->b_data + bi->bi_offset; |
@@ -1349,33 +1355,39 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
1349 | if (blk != BFITNOENT) | 1355 | if (blk != BFITNOENT) |
1350 | break; | 1356 | break; |
1351 | 1357 | ||
1358 | if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) | ||
1359 | set_bit(GBF_FULL, &bi->bi_flags); | ||
1360 | |||
1352 | /* Try next bitmap block (wrap back to rgrp header if at end) */ | 1361 | /* Try next bitmap block (wrap back to rgrp header if at end) */ |
1353 | buf = (buf + 1) % length; | 1362 | skip: |
1354 | bi = rgd->rd_bits + buf; | 1363 | buf++; |
1364 | buf %= length; | ||
1355 | goal = 0; | 1365 | goal = 0; |
1356 | } | 1366 | } |
1357 | 1367 | ||
1358 | if (blk != BFITNOENT && old_state != new_state) { | 1368 | if (blk == BFITNOENT) |
1359 | *n = 1; | 1369 | return blk; |
1360 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | 1370 | *n = 1; |
1371 | if (old_state == new_state) | ||
1372 | goto out; | ||
1373 | |||
1374 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | ||
1375 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, | ||
1376 | bi->bi_len, blk, new_state); | ||
1377 | goal = blk; | ||
1378 | while (*n < elen) { | ||
1379 | goal++; | ||
1380 | if (goal >= (bi->bi_len * GFS2_NBBY)) | ||
1381 | break; | ||
1382 | if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != | ||
1383 | GFS2_BLKST_FREE) | ||
1384 | break; | ||
1361 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, | 1385 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, |
1362 | bi->bi_len, blk, new_state); | 1386 | bi->bi_len, goal, new_state); |
1363 | goal = blk; | 1387 | (*n)++; |
1364 | while (*n < elen) { | ||
1365 | goal++; | ||
1366 | if (goal >= (bi->bi_len * GFS2_NBBY)) | ||
1367 | break; | ||
1368 | if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != | ||
1369 | GFS2_BLKST_FREE) | ||
1370 | break; | ||
1371 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, | ||
1372 | bi->bi_offset, bi->bi_len, goal, | ||
1373 | new_state); | ||
1374 | (*n)++; | ||
1375 | } | ||
1376 | } | 1388 | } |
1377 | 1389 | out: | |
1378 | return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; | 1390 | return (bi->bi_start * GFS2_NBBY) + blk; |
1379 | } | 1391 | } |
1380 | 1392 | ||
1381 | /** | 1393 | /** |
@@ -1435,13 +1447,33 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1435 | } | 1447 | } |
1436 | 1448 | ||
1437 | /** | 1449 | /** |
1438 | * gfs2_alloc_block - Allocate a block | 1450 | * gfs2_rgrp_dump - print out an rgrp |
1451 | * @seq: The iterator | ||
1452 | * @gl: The glock in question | ||
1453 | * | ||
1454 | */ | ||
1455 | |||
1456 | int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) | ||
1457 | { | ||
1458 | const struct gfs2_rgrpd *rgd = gl->gl_object; | ||
1459 | if (rgd == NULL) | ||
1460 | return 0; | ||
1461 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", | ||
1462 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, | ||
1463 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); | ||
1464 | return 0; | ||
1465 | } | ||
1466 | |||
1467 | /** | ||
1468 | * gfs2_alloc_block - Allocate one or more blocks | ||
1439 | * @ip: the inode to allocate the block for | 1469 | * @ip: the inode to allocate the block for |
1470 | * @bn: Used to return the starting block number | ||
1471 | * @n: requested number of blocks/extent length (value/result) | ||
1440 | * | 1472 | * |
1441 | * Returns: the allocated block | 1473 | * Returns: 0 or error |
1442 | */ | 1474 | */ |
1443 | 1475 | ||
1444 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | 1476 | int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) |
1445 | { | 1477 | { |
1446 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1478 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1447 | struct buffer_head *dibh; | 1479 | struct buffer_head *dibh; |
@@ -1457,7 +1489,10 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | |||
1457 | goal = rgd->rd_last_alloc; | 1489 | goal = rgd->rd_last_alloc; |
1458 | 1490 | ||
1459 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); | 1491 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); |
1460 | BUG_ON(blk == BFITNOENT); | 1492 | |
1493 | /* Since all blocks are reserved in advance, this shouldn't happen */ | ||
1494 | if (blk == BFITNOENT) | ||
1495 | goto rgrp_error; | ||
1461 | 1496 | ||
1462 | rgd->rd_last_alloc = blk; | 1497 | rgd->rd_last_alloc = blk; |
1463 | block = rgd->rd_data0 + blk; | 1498 | block = rgd->rd_data0 + blk; |
@@ -1469,7 +1504,9 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | |||
1469 | di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); | 1504 | di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); |
1470 | brelse(dibh); | 1505 | brelse(dibh); |
1471 | } | 1506 | } |
1472 | gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); | 1507 | if (rgd->rd_free < *n) |
1508 | goto rgrp_error; | ||
1509 | |||
1473 | rgd->rd_free -= *n; | 1510 | rgd->rd_free -= *n; |
1474 | 1511 | ||
1475 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1512 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
@@ -1483,8 +1520,17 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | |||
1483 | spin_lock(&sdp->sd_rindex_spin); | 1520 | spin_lock(&sdp->sd_rindex_spin); |
1484 | rgd->rd_free_clone -= *n; | 1521 | rgd->rd_free_clone -= *n; |
1485 | spin_unlock(&sdp->sd_rindex_spin); | 1522 | spin_unlock(&sdp->sd_rindex_spin); |
1523 | trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); | ||
1524 | *bn = block; | ||
1525 | return 0; | ||
1486 | 1526 | ||
1487 | return block; | 1527 | rgrp_error: |
1528 | fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", | ||
1529 | (unsigned long long)rgd->rd_addr); | ||
1530 | fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); | ||
1531 | gfs2_rgrp_dump(NULL, rgd->rd_gl); | ||
1532 | rgd->rd_flags |= GFS2_RDF_ERROR; | ||
1533 | return -EIO; | ||
1488 | } | 1534 | } |
1489 | 1535 | ||
1490 | /** | 1536 | /** |
@@ -1526,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | |||
1526 | spin_lock(&sdp->sd_rindex_spin); | 1572 | spin_lock(&sdp->sd_rindex_spin); |
1527 | rgd->rd_free_clone--; | 1573 | rgd->rd_free_clone--; |
1528 | spin_unlock(&sdp->sd_rindex_spin); | 1574 | spin_unlock(&sdp->sd_rindex_spin); |
1529 | 1575 | trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); | |
1530 | return block; | 1576 | return block; |
1531 | } | 1577 | } |
1532 | 1578 | ||
@@ -1546,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
1546 | rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); | 1592 | rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); |
1547 | if (!rgd) | 1593 | if (!rgd) |
1548 | return; | 1594 | return; |
1549 | 1595 | trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); | |
1550 | rgd->rd_free += blen; | 1596 | rgd->rd_free += blen; |
1551 | 1597 | ||
1552 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1598 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
@@ -1574,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
1574 | rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); | 1620 | rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); |
1575 | if (!rgd) | 1621 | if (!rgd) |
1576 | return; | 1622 | return; |
1577 | 1623 | trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); | |
1578 | rgd->rd_free += blen; | 1624 | rgd->rd_free += blen; |
1579 | 1625 | ||
1580 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1626 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
@@ -1597,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode) | |||
1597 | rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); | 1643 | rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); |
1598 | if (!rgd) | 1644 | if (!rgd) |
1599 | return; | 1645 | return; |
1646 | trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); | ||
1600 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1647 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1601 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 1648 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1602 | gfs2_trans_add_rg(rgd); | 1649 | gfs2_trans_add_rg(rgd); |
@@ -1628,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | |||
1628 | void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) | 1675 | void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) |
1629 | { | 1676 | { |
1630 | gfs2_free_uninit_di(rgd, ip->i_no_addr); | 1677 | gfs2_free_uninit_di(rgd, ip->i_no_addr); |
1678 | trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); | ||
1631 | gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); | 1679 | gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); |
1632 | gfs2_meta_wipe(ip, ip->i_no_addr, 1); | 1680 | gfs2_meta_wipe(ip, ip->i_no_addr, 1); |
1633 | } | 1681 | } |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3181c7e624bf..1e76ff0f3e00 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -14,22 +14,22 @@ struct gfs2_rgrpd; | |||
14 | struct gfs2_sbd; | 14 | struct gfs2_sbd; |
15 | struct gfs2_holder; | 15 | struct gfs2_holder; |
16 | 16 | ||
17 | void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); | 17 | extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); |
18 | 18 | ||
19 | struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); | 19 | struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); |
20 | struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); | 20 | struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); |
21 | struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); | 21 | struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); |
22 | 22 | ||
23 | void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); | 23 | extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); |
24 | int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); | 24 | extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); |
25 | 25 | ||
26 | int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); | 26 | extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); |
27 | void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); | 27 | extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); |
28 | void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); | 28 | extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); |
29 | 29 | ||
30 | void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); | 30 | extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); |
31 | 31 | ||
32 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | 32 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
33 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) | 33 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) |
34 | { | 34 | { |
35 | BUG_ON(ip->i_alloc == NULL); | 35 | BUG_ON(ip->i_alloc == NULL); |
@@ -37,22 +37,22 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) | |||
37 | ip->i_alloc = NULL; | 37 | ip->i_alloc = NULL; |
38 | } | 38 | } |
39 | 39 | ||
40 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, | 40 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, |
41 | char *file, unsigned int line); | 41 | unsigned int line); |
42 | #define gfs2_inplace_reserve(ip) \ | 42 | #define gfs2_inplace_reserve(ip) \ |
43 | gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) | 43 | gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) |
44 | 44 | ||
45 | void gfs2_inplace_release(struct gfs2_inode *ip); | 45 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
46 | 46 | ||
47 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); | 47 | extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); |
48 | 48 | ||
49 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n); | 49 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); |
50 | u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); | 50 | extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); |
51 | 51 | ||
52 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); | 52 | extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); |
53 | void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); | 53 | extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); |
54 | void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); | 54 | extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); |
55 | void gfs2_unlink_di(struct inode *inode); | 55 | extern void gfs2_unlink_di(struct inode *inode); |
56 | 56 | ||
57 | struct gfs2_rgrp_list { | 57 | struct gfs2_rgrp_list { |
58 | unsigned int rl_rgrps; | 58 | unsigned int rl_rgrps; |
@@ -61,10 +61,11 @@ struct gfs2_rgrp_list { | |||
61 | struct gfs2_holder *rl_ghs; | 61 | struct gfs2_holder *rl_ghs; |
62 | }; | 62 | }; |
63 | 63 | ||
64 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | 64 | extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, |
65 | u64 block); | 65 | u64 block); |
66 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); | 66 | extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); |
67 | void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); | 67 | extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); |
68 | u64 gfs2_ri_total(struct gfs2_sbd *sdp); | 68 | extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); |
69 | extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); | ||
69 | 70 | ||
70 | #endif /* __RGRP_DOT_H__ */ | 71 | #endif /* __RGRP_DOT_H__ */ |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 601913e0a482..0a6801336470 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -7,14 +7,20 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/bio.h> | ||
10 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
11 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
12 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
13 | #include <linux/completion.h> | 14 | #include <linux/completion.h> |
14 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
15 | #include <linux/crc32.h> | 16 | #include <linux/statfs.h> |
17 | #include <linux/seq_file.h> | ||
18 | #include <linux/mount.h> | ||
19 | #include <linux/kthread.h> | ||
20 | #include <linux/delay.h> | ||
16 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
17 | #include <linux/bio.h> | 22 | #include <linux/crc32.h> |
23 | #include <linux/time.h> | ||
18 | 24 | ||
19 | #include "gfs2.h" | 25 | #include "gfs2.h" |
20 | #include "incore.h" | 26 | #include "incore.h" |
@@ -31,6 +37,183 @@ | |||
31 | #include "super.h" | 37 | #include "super.h" |
32 | #include "trans.h" | 38 | #include "trans.h" |
33 | #include "util.h" | 39 | #include "util.h" |
40 | #include "sys.h" | ||
41 | #include "eattr.h" | ||
42 | |||
43 | #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) | ||
44 | |||
45 | enum { | ||
46 | Opt_lockproto, | ||
47 | Opt_locktable, | ||
48 | Opt_hostdata, | ||
49 | Opt_spectator, | ||
50 | Opt_ignore_local_fs, | ||
51 | Opt_localflocks, | ||
52 | Opt_localcaching, | ||
53 | Opt_debug, | ||
54 | Opt_nodebug, | ||
55 | Opt_upgrade, | ||
56 | Opt_acl, | ||
57 | Opt_noacl, | ||
58 | Opt_quota_off, | ||
59 | Opt_quota_account, | ||
60 | Opt_quota_on, | ||
61 | Opt_quota, | ||
62 | Opt_noquota, | ||
63 | Opt_suiddir, | ||
64 | Opt_nosuiddir, | ||
65 | Opt_data_writeback, | ||
66 | Opt_data_ordered, | ||
67 | Opt_meta, | ||
68 | Opt_discard, | ||
69 | Opt_nodiscard, | ||
70 | Opt_commit, | ||
71 | Opt_error, | ||
72 | }; | ||
73 | |||
74 | static const match_table_t tokens = { | ||
75 | {Opt_lockproto, "lockproto=%s"}, | ||
76 | {Opt_locktable, "locktable=%s"}, | ||
77 | {Opt_hostdata, "hostdata=%s"}, | ||
78 | {Opt_spectator, "spectator"}, | ||
79 | {Opt_ignore_local_fs, "ignore_local_fs"}, | ||
80 | {Opt_localflocks, "localflocks"}, | ||
81 | {Opt_localcaching, "localcaching"}, | ||
82 | {Opt_debug, "debug"}, | ||
83 | {Opt_nodebug, "nodebug"}, | ||
84 | {Opt_upgrade, "upgrade"}, | ||
85 | {Opt_acl, "acl"}, | ||
86 | {Opt_noacl, "noacl"}, | ||
87 | {Opt_quota_off, "quota=off"}, | ||
88 | {Opt_quota_account, "quota=account"}, | ||
89 | {Opt_quota_on, "quota=on"}, | ||
90 | {Opt_quota, "quota"}, | ||
91 | {Opt_noquota, "noquota"}, | ||
92 | {Opt_suiddir, "suiddir"}, | ||
93 | {Opt_nosuiddir, "nosuiddir"}, | ||
94 | {Opt_data_writeback, "data=writeback"}, | ||
95 | {Opt_data_ordered, "data=ordered"}, | ||
96 | {Opt_meta, "meta"}, | ||
97 | {Opt_discard, "discard"}, | ||
98 | {Opt_nodiscard, "nodiscard"}, | ||
99 | {Opt_commit, "commit=%d"}, | ||
100 | {Opt_error, NULL} | ||
101 | }; | ||
102 | |||
103 | /** | ||
104 | * gfs2_mount_args - Parse mount options | ||
105 | * @sdp: | ||
106 | * @data: | ||
107 | * | ||
108 | * Return: errno | ||
109 | */ | ||
110 | |||
111 | int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) | ||
112 | { | ||
113 | char *o; | ||
114 | int token; | ||
115 | substring_t tmp[MAX_OPT_ARGS]; | ||
116 | int rv; | ||
117 | |||
118 | /* Split the options into tokens with the "," character and | ||
119 | process them */ | ||
120 | |||
121 | while (1) { | ||
122 | o = strsep(&options, ","); | ||
123 | if (o == NULL) | ||
124 | break; | ||
125 | if (*o == '\0') | ||
126 | continue; | ||
127 | |||
128 | token = match_token(o, tokens, tmp); | ||
129 | switch (token) { | ||
130 | case Opt_lockproto: | ||
131 | match_strlcpy(args->ar_lockproto, &tmp[0], | ||
132 | GFS2_LOCKNAME_LEN); | ||
133 | break; | ||
134 | case Opt_locktable: | ||
135 | match_strlcpy(args->ar_locktable, &tmp[0], | ||
136 | GFS2_LOCKNAME_LEN); | ||
137 | break; | ||
138 | case Opt_hostdata: | ||
139 | match_strlcpy(args->ar_hostdata, &tmp[0], | ||
140 | GFS2_LOCKNAME_LEN); | ||
141 | break; | ||
142 | case Opt_spectator: | ||
143 | args->ar_spectator = 1; | ||
144 | break; | ||
145 | case Opt_ignore_local_fs: | ||
146 | args->ar_ignore_local_fs = 1; | ||
147 | break; | ||
148 | case Opt_localflocks: | ||
149 | args->ar_localflocks = 1; | ||
150 | break; | ||
151 | case Opt_localcaching: | ||
152 | args->ar_localcaching = 1; | ||
153 | break; | ||
154 | case Opt_debug: | ||
155 | args->ar_debug = 1; | ||
156 | break; | ||
157 | case Opt_nodebug: | ||
158 | args->ar_debug = 0; | ||
159 | break; | ||
160 | case Opt_upgrade: | ||
161 | args->ar_upgrade = 1; | ||
162 | break; | ||
163 | case Opt_acl: | ||
164 | args->ar_posix_acl = 1; | ||
165 | break; | ||
166 | case Opt_noacl: | ||
167 | args->ar_posix_acl = 0; | ||
168 | break; | ||
169 | case Opt_quota_off: | ||
170 | case Opt_noquota: | ||
171 | args->ar_quota = GFS2_QUOTA_OFF; | ||
172 | break; | ||
173 | case Opt_quota_account: | ||
174 | args->ar_quota = GFS2_QUOTA_ACCOUNT; | ||
175 | break; | ||
176 | case Opt_quota_on: | ||
177 | case Opt_quota: | ||
178 | args->ar_quota = GFS2_QUOTA_ON; | ||
179 | break; | ||
180 | case Opt_suiddir: | ||
181 | args->ar_suiddir = 1; | ||
182 | break; | ||
183 | case Opt_nosuiddir: | ||
184 | args->ar_suiddir = 0; | ||
185 | break; | ||
186 | case Opt_data_writeback: | ||
187 | args->ar_data = GFS2_DATA_WRITEBACK; | ||
188 | break; | ||
189 | case Opt_data_ordered: | ||
190 | args->ar_data = GFS2_DATA_ORDERED; | ||
191 | break; | ||
192 | case Opt_meta: | ||
193 | args->ar_meta = 1; | ||
194 | break; | ||
195 | case Opt_discard: | ||
196 | args->ar_discard = 1; | ||
197 | break; | ||
198 | case Opt_nodiscard: | ||
199 | args->ar_discard = 0; | ||
200 | break; | ||
201 | case Opt_commit: | ||
202 | rv = match_int(&tmp[0], &args->ar_commit); | ||
203 | if (rv || args->ar_commit <= 0) { | ||
204 | fs_info(sdp, "commit mount option requires a positive numeric argument\n"); | ||
205 | return rv ? rv : -EINVAL; | ||
206 | } | ||
207 | break; | ||
208 | case Opt_error: | ||
209 | default: | ||
210 | fs_info(sdp, "invalid mount option: %s\n", o); | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | } | ||
214 | |||
215 | return 0; | ||
216 | } | ||
34 | 217 | ||
35 | /** | 218 | /** |
36 | * gfs2_jindex_free - Clear all the journal index information | 219 | * gfs2_jindex_free - Clear all the journal index information |
@@ -436,3 +619,706 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) | |||
436 | mutex_unlock(&sdp->sd_freeze_lock); | 619 | mutex_unlock(&sdp->sd_freeze_lock); |
437 | } | 620 | } |
438 | 621 | ||
622 | |||
623 | /** | ||
624 | * gfs2_write_inode - Make sure the inode is stable on the disk | ||
625 | * @inode: The inode | ||
626 | * @sync: synchronous write flag | ||
627 | * | ||
628 | * Returns: errno | ||
629 | */ | ||
630 | |||
631 | static int gfs2_write_inode(struct inode *inode, int sync) | ||
632 | { | ||
633 | struct gfs2_inode *ip = GFS2_I(inode); | ||
634 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
635 | struct gfs2_holder gh; | ||
636 | struct buffer_head *bh; | ||
637 | struct timespec atime; | ||
638 | struct gfs2_dinode *di; | ||
639 | int ret = 0; | ||
640 | |||
641 | /* Check this is a "normal" inode, etc */ | ||
642 | if (!test_bit(GIF_USER, &ip->i_flags) || | ||
643 | (current->flags & PF_MEMALLOC)) | ||
644 | return 0; | ||
645 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
646 | if (ret) | ||
647 | goto do_flush; | ||
648 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
649 | if (ret) | ||
650 | goto do_unlock; | ||
651 | ret = gfs2_meta_inode_buffer(ip, &bh); | ||
652 | if (ret == 0) { | ||
653 | di = (struct gfs2_dinode *)bh->b_data; | ||
654 | atime.tv_sec = be64_to_cpu(di->di_atime); | ||
655 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
656 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
657 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
658 | gfs2_dinode_out(ip, bh->b_data); | ||
659 | } | ||
660 | brelse(bh); | ||
661 | } | ||
662 | gfs2_trans_end(sdp); | ||
663 | do_unlock: | ||
664 | gfs2_glock_dq_uninit(&gh); | ||
665 | do_flush: | ||
666 | if (sync != 0) | ||
667 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
668 | return ret; | ||
669 | } | ||
670 | |||
671 | /** | ||
672 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
673 | * @sdp: the filesystem | ||
674 | * | ||
675 | * Returns: errno | ||
676 | */ | ||
677 | |||
678 | static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
679 | { | ||
680 | struct gfs2_holder t_gh; | ||
681 | int error; | ||
682 | |||
683 | gfs2_quota_sync(sdp); | ||
684 | gfs2_statfs_sync(sdp); | ||
685 | |||
686 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
687 | &t_gh); | ||
688 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
689 | return error; | ||
690 | |||
691 | gfs2_meta_syncfs(sdp); | ||
692 | gfs2_log_shutdown(sdp); | ||
693 | |||
694 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
695 | |||
696 | if (t_gh.gh_gl) | ||
697 | gfs2_glock_dq_uninit(&t_gh); | ||
698 | |||
699 | gfs2_quota_cleanup(sdp); | ||
700 | |||
701 | return error; | ||
702 | } | ||
703 | |||
704 | static int gfs2_umount_recovery_wait(void *word) | ||
705 | { | ||
706 | schedule(); | ||
707 | return 0; | ||
708 | } | ||
709 | |||
710 | /** | ||
711 | * gfs2_put_super - Unmount the filesystem | ||
712 | * @sb: The VFS superblock | ||
713 | * | ||
714 | */ | ||
715 | |||
716 | static void gfs2_put_super(struct super_block *sb) | ||
717 | { | ||
718 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
719 | int error; | ||
720 | struct gfs2_jdesc *jd; | ||
721 | |||
722 | /* Unfreeze the filesystem, if we need to */ | ||
723 | |||
724 | mutex_lock(&sdp->sd_freeze_lock); | ||
725 | if (sdp->sd_freeze_count) | ||
726 | gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); | ||
727 | mutex_unlock(&sdp->sd_freeze_lock); | ||
728 | |||
729 | /* No more recovery requests */ | ||
730 | set_bit(SDF_NORECOVERY, &sdp->sd_flags); | ||
731 | smp_mb(); | ||
732 | |||
733 | /* Wait on outstanding recovery */ | ||
734 | restart: | ||
735 | spin_lock(&sdp->sd_jindex_spin); | ||
736 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | ||
737 | if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) | ||
738 | continue; | ||
739 | spin_unlock(&sdp->sd_jindex_spin); | ||
740 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, | ||
741 | gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); | ||
742 | goto restart; | ||
743 | } | ||
744 | spin_unlock(&sdp->sd_jindex_spin); | ||
745 | |||
746 | kthread_stop(sdp->sd_quotad_process); | ||
747 | kthread_stop(sdp->sd_logd_process); | ||
748 | |||
749 | if (!(sb->s_flags & MS_RDONLY)) { | ||
750 | error = gfs2_make_fs_ro(sdp); | ||
751 | if (error) | ||
752 | gfs2_io_error(sdp); | ||
753 | } | ||
754 | /* At this point, we're through modifying the disk */ | ||
755 | |||
756 | /* Release stuff */ | ||
757 | |||
758 | iput(sdp->sd_jindex); | ||
759 | iput(sdp->sd_inum_inode); | ||
760 | iput(sdp->sd_statfs_inode); | ||
761 | iput(sdp->sd_rindex); | ||
762 | iput(sdp->sd_quota_inode); | ||
763 | |||
764 | gfs2_glock_put(sdp->sd_rename_gl); | ||
765 | gfs2_glock_put(sdp->sd_trans_gl); | ||
766 | |||
767 | if (!sdp->sd_args.ar_spectator) { | ||
768 | gfs2_glock_dq_uninit(&sdp->sd_journal_gh); | ||
769 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); | ||
770 | gfs2_glock_dq_uninit(&sdp->sd_ir_gh); | ||
771 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); | ||
772 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); | ||
773 | iput(sdp->sd_ir_inode); | ||
774 | iput(sdp->sd_sc_inode); | ||
775 | iput(sdp->sd_qc_inode); | ||
776 | } | ||
777 | |||
778 | gfs2_glock_dq_uninit(&sdp->sd_live_gh); | ||
779 | gfs2_clear_rgrpd(sdp); | ||
780 | gfs2_jindex_free(sdp); | ||
781 | /* Take apart glock structures and buffer lists */ | ||
782 | gfs2_gl_hash_clear(sdp); | ||
783 | /* Unmount the locking protocol */ | ||
784 | gfs2_lm_unmount(sdp); | ||
785 | |||
786 | /* At this point, we're through participating in the lockspace */ | ||
787 | gfs2_sys_fs_del(sdp); | ||
788 | } | ||
789 | |||
790 | /** | ||
791 | * gfs2_sync_fs - sync the filesystem | ||
792 | * @sb: the superblock | ||
793 | * | ||
794 | * Flushes the log to disk. | ||
795 | */ | ||
796 | |||
797 | static int gfs2_sync_fs(struct super_block *sb, int wait) | ||
798 | { | ||
799 | if (wait && sb->s_fs_info) | ||
800 | gfs2_log_flush(sb->s_fs_info, NULL); | ||
801 | return 0; | ||
802 | } | ||
803 | |||
804 | /** | ||
805 | * gfs2_freeze - prevent further writes to the filesystem | ||
806 | * @sb: the VFS structure for the filesystem | ||
807 | * | ||
808 | */ | ||
809 | |||
810 | static int gfs2_freeze(struct super_block *sb) | ||
811 | { | ||
812 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
813 | int error; | ||
814 | |||
815 | if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
816 | return -EINVAL; | ||
817 | |||
818 | for (;;) { | ||
819 | error = gfs2_freeze_fs(sdp); | ||
820 | if (!error) | ||
821 | break; | ||
822 | |||
823 | switch (error) { | ||
824 | case -EBUSY: | ||
825 | fs_err(sdp, "waiting for recovery before freeze\n"); | ||
826 | break; | ||
827 | |||
828 | default: | ||
829 | fs_err(sdp, "error freezing FS: %d\n", error); | ||
830 | break; | ||
831 | } | ||
832 | |||
833 | fs_err(sdp, "retrying...\n"); | ||
834 | msleep(1000); | ||
835 | } | ||
836 | return 0; | ||
837 | } | ||
838 | |||
839 | /** | ||
840 | * gfs2_unfreeze - reallow writes to the filesystem | ||
841 | * @sb: the VFS structure for the filesystem | ||
842 | * | ||
843 | */ | ||
844 | |||
845 | static int gfs2_unfreeze(struct super_block *sb) | ||
846 | { | ||
847 | gfs2_unfreeze_fs(sb->s_fs_info); | ||
848 | return 0; | ||
849 | } | ||
850 | |||
851 | /** | ||
852 | * statfs_fill - fill in the sg for a given RG | ||
853 | * @rgd: the RG | ||
854 | * @sc: the sc structure | ||
855 | * | ||
856 | * Returns: 0 on success, -ESTALE if the LVB is invalid | ||
857 | */ | ||
858 | |||
859 | static int statfs_slow_fill(struct gfs2_rgrpd *rgd, | ||
860 | struct gfs2_statfs_change_host *sc) | ||
861 | { | ||
862 | gfs2_rgrp_verify(rgd); | ||
863 | sc->sc_total += rgd->rd_data; | ||
864 | sc->sc_free += rgd->rd_free; | ||
865 | sc->sc_dinodes += rgd->rd_dinodes; | ||
866 | return 0; | ||
867 | } | ||
868 | |||
869 | /** | ||
870 | * gfs2_statfs_slow - Stat a filesystem using asynchronous locking | ||
871 | * @sdp: the filesystem | ||
872 | * @sc: the sc info that will be returned | ||
873 | * | ||
874 | * Any error (other than a signal) will cause this routine to fall back | ||
875 | * to the synchronous version. | ||
876 | * | ||
877 | * FIXME: This really shouldn't busy wait like this. | ||
878 | * | ||
879 | * Returns: errno | ||
880 | */ | ||
881 | |||
882 | static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) | ||
883 | { | ||
884 | struct gfs2_holder ri_gh; | ||
885 | struct gfs2_rgrpd *rgd_next; | ||
886 | struct gfs2_holder *gha, *gh; | ||
887 | unsigned int slots = 64; | ||
888 | unsigned int x; | ||
889 | int done; | ||
890 | int error = 0, err; | ||
891 | |||
892 | memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); | ||
893 | gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); | ||
894 | if (!gha) | ||
895 | return -ENOMEM; | ||
896 | |||
897 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
898 | if (error) | ||
899 | goto out; | ||
900 | |||
901 | rgd_next = gfs2_rgrpd_get_first(sdp); | ||
902 | |||
903 | for (;;) { | ||
904 | done = 1; | ||
905 | |||
906 | for (x = 0; x < slots; x++) { | ||
907 | gh = gha + x; | ||
908 | |||
909 | if (gh->gh_gl && gfs2_glock_poll(gh)) { | ||
910 | err = gfs2_glock_wait(gh); | ||
911 | if (err) { | ||
912 | gfs2_holder_uninit(gh); | ||
913 | error = err; | ||
914 | } else { | ||
915 | if (!error) | ||
916 | error = statfs_slow_fill( | ||
917 | gh->gh_gl->gl_object, sc); | ||
918 | gfs2_glock_dq_uninit(gh); | ||
919 | } | ||
920 | } | ||
921 | |||
922 | if (gh->gh_gl) | ||
923 | done = 0; | ||
924 | else if (rgd_next && !error) { | ||
925 | error = gfs2_glock_nq_init(rgd_next->rd_gl, | ||
926 | LM_ST_SHARED, | ||
927 | GL_ASYNC, | ||
928 | gh); | ||
929 | rgd_next = gfs2_rgrpd_get_next(rgd_next); | ||
930 | done = 0; | ||
931 | } | ||
932 | |||
933 | if (signal_pending(current)) | ||
934 | error = -ERESTARTSYS; | ||
935 | } | ||
936 | |||
937 | if (done) | ||
938 | break; | ||
939 | |||
940 | yield(); | ||
941 | } | ||
942 | |||
943 | gfs2_glock_dq_uninit(&ri_gh); | ||
944 | |||
945 | out: | ||
946 | kfree(gha); | ||
947 | return error; | ||
948 | } | ||
949 | |||
950 | /** | ||
951 | * gfs2_statfs_i - Do a statfs | ||
952 | * @sdp: the filesystem | ||
953 | * @sg: the sg structure | ||
954 | * | ||
955 | * Returns: errno | ||
956 | */ | ||
957 | |||
958 | static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) | ||
959 | { | ||
960 | struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; | ||
961 | struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; | ||
962 | |||
963 | spin_lock(&sdp->sd_statfs_spin); | ||
964 | |||
965 | *sc = *m_sc; | ||
966 | sc->sc_total += l_sc->sc_total; | ||
967 | sc->sc_free += l_sc->sc_free; | ||
968 | sc->sc_dinodes += l_sc->sc_dinodes; | ||
969 | |||
970 | spin_unlock(&sdp->sd_statfs_spin); | ||
971 | |||
972 | if (sc->sc_free < 0) | ||
973 | sc->sc_free = 0; | ||
974 | if (sc->sc_free > sc->sc_total) | ||
975 | sc->sc_free = sc->sc_total; | ||
976 | if (sc->sc_dinodes < 0) | ||
977 | sc->sc_dinodes = 0; | ||
978 | |||
979 | return 0; | ||
980 | } | ||
981 | |||
982 | /** | ||
983 | * gfs2_statfs - Gather and return stats about the filesystem | ||
984 | * @sb: The superblock | ||
985 | * @statfsbuf: The buffer | ||
986 | * | ||
987 | * Returns: 0 on success or error code | ||
988 | */ | ||
989 | |||
990 | static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
991 | { | ||
992 | struct super_block *sb = dentry->d_inode->i_sb; | ||
993 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
994 | struct gfs2_statfs_change_host sc; | ||
995 | int error; | ||
996 | |||
997 | if (gfs2_tune_get(sdp, gt_statfs_slow)) | ||
998 | error = gfs2_statfs_slow(sdp, &sc); | ||
999 | else | ||
1000 | error = gfs2_statfs_i(sdp, &sc); | ||
1001 | |||
1002 | if (error) | ||
1003 | return error; | ||
1004 | |||
1005 | buf->f_type = GFS2_MAGIC; | ||
1006 | buf->f_bsize = sdp->sd_sb.sb_bsize; | ||
1007 | buf->f_blocks = sc.sc_total; | ||
1008 | buf->f_bfree = sc.sc_free; | ||
1009 | buf->f_bavail = sc.sc_free; | ||
1010 | buf->f_files = sc.sc_dinodes + sc.sc_free; | ||
1011 | buf->f_ffree = sc.sc_free; | ||
1012 | buf->f_namelen = GFS2_FNAMESIZE; | ||
1013 | |||
1014 | return 0; | ||
1015 | } | ||
1016 | |||
1017 | /** | ||
1018 | * gfs2_remount_fs - called when the FS is remounted | ||
1019 | * @sb: the filesystem | ||
1020 | * @flags: the remount flags | ||
1021 | * @data: extra data passed in (not used right now) | ||
1022 | * | ||
1023 | * Returns: errno | ||
1024 | */ | ||
1025 | |||
1026 | static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | ||
1027 | { | ||
1028 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
1029 | struct gfs2_args args = sdp->sd_args; /* Default to current settings */ | ||
1030 | struct gfs2_tune *gt = &sdp->sd_tune; | ||
1031 | int error; | ||
1032 | |||
1033 | spin_lock(>->gt_spin); | ||
1034 | args.ar_commit = gt->gt_log_flush_secs; | ||
1035 | spin_unlock(>->gt_spin); | ||
1036 | error = gfs2_mount_args(sdp, &args, data); | ||
1037 | if (error) | ||
1038 | return error; | ||
1039 | |||
1040 | /* Not allowed to change locking details */ | ||
1041 | if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) || | ||
1042 | strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) || | ||
1043 | strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata)) | ||
1044 | return -EINVAL; | ||
1045 | |||
1046 | /* Some flags must not be changed */ | ||
1047 | if (args_neq(&args, &sdp->sd_args, spectator) || | ||
1048 | args_neq(&args, &sdp->sd_args, ignore_local_fs) || | ||
1049 | args_neq(&args, &sdp->sd_args, localflocks) || | ||
1050 | args_neq(&args, &sdp->sd_args, localcaching) || | ||
1051 | args_neq(&args, &sdp->sd_args, meta)) | ||
1052 | return -EINVAL; | ||
1053 | |||
1054 | if (sdp->sd_args.ar_spectator) | ||
1055 | *flags |= MS_RDONLY; | ||
1056 | |||
1057 | if ((sb->s_flags ^ *flags) & MS_RDONLY) { | ||
1058 | if (*flags & MS_RDONLY) | ||
1059 | error = gfs2_make_fs_ro(sdp); | ||
1060 | else | ||
1061 | error = gfs2_make_fs_rw(sdp); | ||
1062 | if (error) | ||
1063 | return error; | ||
1064 | } | ||
1065 | |||
1066 | sdp->sd_args = args; | ||
1067 | if (sdp->sd_args.ar_posix_acl) | ||
1068 | sb->s_flags |= MS_POSIXACL; | ||
1069 | else | ||
1070 | sb->s_flags &= ~MS_POSIXACL; | ||
1071 | spin_lock(>->gt_spin); | ||
1072 | gt->gt_log_flush_secs = args.ar_commit; | ||
1073 | spin_unlock(>->gt_spin); | ||
1074 | |||
1075 | return 0; | ||
1076 | } | ||
1077 | |||
1078 | /** | ||
1079 | * gfs2_drop_inode - Drop an inode (test for remote unlink) | ||
1080 | * @inode: The inode to drop | ||
1081 | * | ||
1082 | * If we've received a callback on an iopen lock then its because a | ||
1083 | * remote node tried to deallocate the inode but failed due to this node | ||
1084 | * still having the inode open. Here we mark the link count zero | ||
1085 | * since we know that it must have reached zero if the GLF_DEMOTE flag | ||
1086 | * is set on the iopen glock. If we didn't do a disk read since the | ||
1087 | * remote node removed the final link then we might otherwise miss | ||
1088 | * this event. This check ensures that this node will deallocate the | ||
1089 | * inode's blocks, or alternatively pass the baton on to another | ||
1090 | * node for later deallocation. | ||
1091 | */ | ||
1092 | |||
1093 | static void gfs2_drop_inode(struct inode *inode) | ||
1094 | { | ||
1095 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1096 | |||
1097 | if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { | ||
1098 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; | ||
1099 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
1100 | clear_nlink(inode); | ||
1101 | } | ||
1102 | generic_drop_inode(inode); | ||
1103 | } | ||
1104 | |||
1105 | /** | ||
1106 | * gfs2_clear_inode - Deallocate an inode when VFS is done with it | ||
1107 | * @inode: The VFS inode | ||
1108 | * | ||
1109 | */ | ||
1110 | |||
1111 | static void gfs2_clear_inode(struct inode *inode) | ||
1112 | { | ||
1113 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1114 | |||
1115 | /* This tells us its a "real" inode and not one which only | ||
1116 | * serves to contain an address space (see rgrp.c, meta_io.c) | ||
1117 | * which therefore doesn't have its own glocks. | ||
1118 | */ | ||
1119 | if (test_bit(GIF_USER, &ip->i_flags)) { | ||
1120 | ip->i_gl->gl_object = NULL; | ||
1121 | gfs2_glock_put(ip->i_gl); | ||
1122 | ip->i_gl = NULL; | ||
1123 | if (ip->i_iopen_gh.gh_gl) { | ||
1124 | ip->i_iopen_gh.gh_gl->gl_object = NULL; | ||
1125 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
1126 | } | ||
1127 | } | ||
1128 | } | ||
1129 | |||
1130 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | ||
1131 | { | ||
1132 | do { | ||
1133 | if (d1 == d2) | ||
1134 | return 1; | ||
1135 | d1 = d1->d_parent; | ||
1136 | } while (!IS_ROOT(d1)); | ||
1137 | return 0; | ||
1138 | } | ||
1139 | |||
1140 | /** | ||
1141 | * gfs2_show_options - Show mount options for /proc/mounts | ||
1142 | * @s: seq_file structure | ||
1143 | * @mnt: vfsmount | ||
1144 | * | ||
1145 | * Returns: 0 on success or error code | ||
1146 | */ | ||
1147 | |||
1148 | static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | ||
1149 | { | ||
1150 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | ||
1151 | struct gfs2_args *args = &sdp->sd_args; | ||
1152 | int lfsecs; | ||
1153 | |||
1154 | if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) | ||
1155 | seq_printf(s, ",meta"); | ||
1156 | if (args->ar_lockproto[0]) | ||
1157 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | ||
1158 | if (args->ar_locktable[0]) | ||
1159 | seq_printf(s, ",locktable=%s", args->ar_locktable); | ||
1160 | if (args->ar_hostdata[0]) | ||
1161 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | ||
1162 | if (args->ar_spectator) | ||
1163 | seq_printf(s, ",spectator"); | ||
1164 | if (args->ar_ignore_local_fs) | ||
1165 | seq_printf(s, ",ignore_local_fs"); | ||
1166 | if (args->ar_localflocks) | ||
1167 | seq_printf(s, ",localflocks"); | ||
1168 | if (args->ar_localcaching) | ||
1169 | seq_printf(s, ",localcaching"); | ||
1170 | if (args->ar_debug) | ||
1171 | seq_printf(s, ",debug"); | ||
1172 | if (args->ar_upgrade) | ||
1173 | seq_printf(s, ",upgrade"); | ||
1174 | if (args->ar_posix_acl) | ||
1175 | seq_printf(s, ",acl"); | ||
1176 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | ||
1177 | char *state; | ||
1178 | switch (args->ar_quota) { | ||
1179 | case GFS2_QUOTA_OFF: | ||
1180 | state = "off"; | ||
1181 | break; | ||
1182 | case GFS2_QUOTA_ACCOUNT: | ||
1183 | state = "account"; | ||
1184 | break; | ||
1185 | case GFS2_QUOTA_ON: | ||
1186 | state = "on"; | ||
1187 | break; | ||
1188 | default: | ||
1189 | state = "unknown"; | ||
1190 | break; | ||
1191 | } | ||
1192 | seq_printf(s, ",quota=%s", state); | ||
1193 | } | ||
1194 | if (args->ar_suiddir) | ||
1195 | seq_printf(s, ",suiddir"); | ||
1196 | if (args->ar_data != GFS2_DATA_DEFAULT) { | ||
1197 | char *state; | ||
1198 | switch (args->ar_data) { | ||
1199 | case GFS2_DATA_WRITEBACK: | ||
1200 | state = "writeback"; | ||
1201 | break; | ||
1202 | case GFS2_DATA_ORDERED: | ||
1203 | state = "ordered"; | ||
1204 | break; | ||
1205 | default: | ||
1206 | state = "unknown"; | ||
1207 | break; | ||
1208 | } | ||
1209 | seq_printf(s, ",data=%s", state); | ||
1210 | } | ||
1211 | if (args->ar_discard) | ||
1212 | seq_printf(s, ",discard"); | ||
1213 | lfsecs = sdp->sd_tune.gt_log_flush_secs; | ||
1214 | if (lfsecs != 60) | ||
1215 | seq_printf(s, ",commit=%d", lfsecs); | ||
1216 | return 0; | ||
1217 | } | ||
1218 | |||
1219 | /* | ||
1220 | * We have to (at the moment) hold the inodes main lock to cover | ||
1221 | * the gap between unlocking the shared lock on the iopen lock and | ||
1222 | * taking the exclusive lock. I'd rather do a shared -> exclusive | ||
1223 | * conversion on the iopen lock, but we can change that later. This | ||
1224 | * is safe, just less efficient. | ||
1225 | */ | ||
1226 | |||
1227 | static void gfs2_delete_inode(struct inode *inode) | ||
1228 | { | ||
1229 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | ||
1230 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1231 | struct gfs2_holder gh; | ||
1232 | int error; | ||
1233 | |||
1234 | if (!test_bit(GIF_USER, &ip->i_flags)) | ||
1235 | goto out; | ||
1236 | |||
1237 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
1238 | if (unlikely(error)) { | ||
1239 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
1240 | goto out; | ||
1241 | } | ||
1242 | |||
1243 | gfs2_glock_dq_wait(&ip->i_iopen_gh); | ||
1244 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); | ||
1245 | error = gfs2_glock_nq(&ip->i_iopen_gh); | ||
1246 | if (error) | ||
1247 | goto out_truncate; | ||
1248 | |||
1249 | if (S_ISDIR(inode->i_mode) && | ||
1250 | (ip->i_diskflags & GFS2_DIF_EXHASH)) { | ||
1251 | error = gfs2_dir_exhash_dealloc(ip); | ||
1252 | if (error) | ||
1253 | goto out_unlock; | ||
1254 | } | ||
1255 | |||
1256 | if (ip->i_eattr) { | ||
1257 | error = gfs2_ea_dealloc(ip); | ||
1258 | if (error) | ||
1259 | goto out_unlock; | ||
1260 | } | ||
1261 | |||
1262 | if (!gfs2_is_stuffed(ip)) { | ||
1263 | error = gfs2_file_dealloc(ip); | ||
1264 | if (error) | ||
1265 | goto out_unlock; | ||
1266 | } | ||
1267 | |||
1268 | error = gfs2_dinode_dealloc(ip); | ||
1269 | if (error) | ||
1270 | goto out_unlock; | ||
1271 | |||
1272 | out_truncate: | ||
1273 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | ||
1274 | if (error) | ||
1275 | goto out_unlock; | ||
1276 | /* Needs to be done before glock release & also in a transaction */ | ||
1277 | truncate_inode_pages(&inode->i_data, 0); | ||
1278 | gfs2_trans_end(sdp); | ||
1279 | |||
1280 | out_unlock: | ||
1281 | if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) | ||
1282 | gfs2_glock_dq(&ip->i_iopen_gh); | ||
1283 | gfs2_holder_uninit(&ip->i_iopen_gh); | ||
1284 | gfs2_glock_dq_uninit(&gh); | ||
1285 | if (error && error != GLR_TRYFAILED && error != -EROFS) | ||
1286 | fs_warn(sdp, "gfs2_delete_inode: %d\n", error); | ||
1287 | out: | ||
1288 | truncate_inode_pages(&inode->i_data, 0); | ||
1289 | clear_inode(inode); | ||
1290 | } | ||
1291 | |||
1292 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | ||
1293 | { | ||
1294 | struct gfs2_inode *ip; | ||
1295 | |||
1296 | ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); | ||
1297 | if (ip) { | ||
1298 | ip->i_flags = 0; | ||
1299 | ip->i_gl = NULL; | ||
1300 | } | ||
1301 | return &ip->i_inode; | ||
1302 | } | ||
1303 | |||
1304 | static void gfs2_destroy_inode(struct inode *inode) | ||
1305 | { | ||
1306 | kmem_cache_free(gfs2_inode_cachep, inode); | ||
1307 | } | ||
1308 | |||
1309 | const struct super_operations gfs2_super_ops = { | ||
1310 | .alloc_inode = gfs2_alloc_inode, | ||
1311 | .destroy_inode = gfs2_destroy_inode, | ||
1312 | .write_inode = gfs2_write_inode, | ||
1313 | .delete_inode = gfs2_delete_inode, | ||
1314 | .put_super = gfs2_put_super, | ||
1315 | .sync_fs = gfs2_sync_fs, | ||
1316 | .freeze_fs = gfs2_freeze, | ||
1317 | .unfreeze_fs = gfs2_unfreeze, | ||
1318 | .statfs = gfs2_statfs, | ||
1319 | .remount_fs = gfs2_remount_fs, | ||
1320 | .clear_inode = gfs2_clear_inode, | ||
1321 | .drop_inode = gfs2_drop_inode, | ||
1322 | .show_options = gfs2_show_options, | ||
1323 | }; | ||
1324 | |||
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 7655f5025fec..23419dc3027b 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -26,6 +26,36 @@ | |||
26 | #include "util.h" | 26 | #include "util.h" |
27 | #include "glops.h" | 27 | #include "glops.h" |
28 | 28 | ||
29 | struct gfs2_attr { | ||
30 | struct attribute attr; | ||
31 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
32 | ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); | ||
33 | }; | ||
34 | |||
35 | static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, | ||
36 | char *buf) | ||
37 | { | ||
38 | struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); | ||
39 | struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); | ||
40 | return a->show ? a->show(sdp, buf) : 0; | ||
41 | } | ||
42 | |||
43 | static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, | ||
44 | const char *buf, size_t len) | ||
45 | { | ||
46 | struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); | ||
47 | struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); | ||
48 | return a->store ? a->store(sdp, buf, len) : len; | ||
49 | } | ||
50 | |||
51 | static struct sysfs_ops gfs2_attr_ops = { | ||
52 | .show = gfs2_attr_show, | ||
53 | .store = gfs2_attr_store, | ||
54 | }; | ||
55 | |||
56 | |||
57 | static struct kset *gfs2_kset; | ||
58 | |||
29 | static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) | 59 | static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) |
30 | { | 60 | { |
31 | return snprintf(buf, PAGE_SIZE, "%u:%u\n", | 61 | return snprintf(buf, PAGE_SIZE, "%u:%u\n", |
@@ -212,11 +242,6 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len | |||
212 | return len; | 242 | return len; |
213 | } | 243 | } |
214 | 244 | ||
215 | struct gfs2_attr { | ||
216 | struct attribute attr; | ||
217 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
218 | ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); | ||
219 | }; | ||
220 | 245 | ||
221 | #define GFS2_ATTR(name, mode, show, store) \ | 246 | #define GFS2_ATTR(name, mode, show, store) \ |
222 | static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) | 247 | static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) |
@@ -246,58 +271,11 @@ static struct attribute *gfs2_attrs[] = { | |||
246 | NULL, | 271 | NULL, |
247 | }; | 272 | }; |
248 | 273 | ||
249 | static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, | ||
250 | char *buf) | ||
251 | { | ||
252 | struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); | ||
253 | struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); | ||
254 | return a->show ? a->show(sdp, buf) : 0; | ||
255 | } | ||
256 | |||
257 | static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, | ||
258 | const char *buf, size_t len) | ||
259 | { | ||
260 | struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); | ||
261 | struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); | ||
262 | return a->store ? a->store(sdp, buf, len) : len; | ||
263 | } | ||
264 | |||
265 | static struct sysfs_ops gfs2_attr_ops = { | ||
266 | .show = gfs2_attr_show, | ||
267 | .store = gfs2_attr_store, | ||
268 | }; | ||
269 | |||
270 | static struct kobj_type gfs2_ktype = { | 274 | static struct kobj_type gfs2_ktype = { |
271 | .default_attrs = gfs2_attrs, | 275 | .default_attrs = gfs2_attrs, |
272 | .sysfs_ops = &gfs2_attr_ops, | 276 | .sysfs_ops = &gfs2_attr_ops, |
273 | }; | 277 | }; |
274 | 278 | ||
275 | static struct kset *gfs2_kset; | ||
276 | |||
277 | /* | ||
278 | * display struct lm_lockstruct fields | ||
279 | */ | ||
280 | |||
281 | struct lockstruct_attr { | ||
282 | struct attribute attr; | ||
283 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
284 | }; | ||
285 | |||
286 | #define LOCKSTRUCT_ATTR(name, fmt) \ | ||
287 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | ||
288 | { \ | ||
289 | return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \ | ||
290 | } \ | ||
291 | static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name) | ||
292 | |||
293 | LOCKSTRUCT_ATTR(jid, "%u\n"); | ||
294 | LOCKSTRUCT_ATTR(first, "%u\n"); | ||
295 | |||
296 | static struct attribute *lockstruct_attrs[] = { | ||
297 | &lockstruct_attr_jid.attr, | ||
298 | &lockstruct_attr_first.attr, | ||
299 | NULL, | ||
300 | }; | ||
301 | 279 | ||
302 | /* | 280 | /* |
303 | * lock_module. Originally from lock_dlm | 281 | * lock_module. Originally from lock_dlm |
@@ -359,34 +337,33 @@ static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) | |||
359 | return sprintf(buf, "%d\n", ls->ls_first_done); | 337 | return sprintf(buf, "%d\n", ls->ls_first_done); |
360 | } | 338 | } |
361 | 339 | ||
362 | static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf) | 340 | static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) |
363 | { | ||
364 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
365 | return sprintf(buf, "%d\n", ls->ls_recover_jid); | ||
366 | } | ||
367 | |||
368 | static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid) | ||
369 | { | 341 | { |
342 | unsigned jid; | ||
370 | struct gfs2_jdesc *jd; | 343 | struct gfs2_jdesc *jd; |
344 | int rv; | ||
345 | |||
346 | rv = sscanf(buf, "%u", &jid); | ||
347 | if (rv != 1) | ||
348 | return -EINVAL; | ||
371 | 349 | ||
350 | rv = -ESHUTDOWN; | ||
372 | spin_lock(&sdp->sd_jindex_spin); | 351 | spin_lock(&sdp->sd_jindex_spin); |
352 | if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) | ||
353 | goto out; | ||
354 | rv = -EBUSY; | ||
355 | if (sdp->sd_jdesc->jd_jid == jid) | ||
356 | goto out; | ||
357 | rv = -ENOENT; | ||
373 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | 358 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { |
374 | if (jd->jd_jid != jid) | 359 | if (jd->jd_jid != jid) |
375 | continue; | 360 | continue; |
376 | jd->jd_dirty = 1; | 361 | rv = slow_work_enqueue(&jd->jd_work); |
377 | break; | 362 | break; |
378 | } | 363 | } |
364 | out: | ||
379 | spin_unlock(&sdp->sd_jindex_spin); | 365 | spin_unlock(&sdp->sd_jindex_spin); |
380 | } | 366 | return rv ? rv : len; |
381 | |||
382 | static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | ||
383 | { | ||
384 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
385 | ls->ls_recover_jid = simple_strtol(buf, NULL, 0); | ||
386 | gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid); | ||
387 | if (sdp->sd_recoverd_process) | ||
388 | wake_up_process(sdp->sd_recoverd_process); | ||
389 | return len; | ||
390 | } | 367 | } |
391 | 368 | ||
392 | static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) | 369 | static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) |
@@ -401,31 +378,31 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) | |||
401 | return sprintf(buf, "%d\n", ls->ls_recover_jid_status); | 378 | return sprintf(buf, "%d\n", ls->ls_recover_jid_status); |
402 | } | 379 | } |
403 | 380 | ||
404 | struct gdlm_attr { | 381 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) |
405 | struct attribute attr; | 382 | { |
406 | ssize_t (*show)(struct gfs2_sbd *sdp, char *); | 383 | return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); |
407 | ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t); | 384 | } |
408 | }; | ||
409 | 385 | ||
410 | #define GDLM_ATTR(_name,_mode,_show,_store) \ | 386 | #define GDLM_ATTR(_name,_mode,_show,_store) \ |
411 | static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) | 387 | static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) |
412 | 388 | ||
413 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); | 389 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); |
414 | GDLM_ATTR(block, 0644, block_show, block_store); | 390 | GDLM_ATTR(block, 0644, block_show, block_store); |
415 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | 391 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); |
416 | GDLM_ATTR(id, 0444, lkid_show, NULL); | 392 | GDLM_ATTR(id, 0444, lkid_show, NULL); |
417 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); | 393 | GDLM_ATTR(jid, 0444, jid_show, NULL); |
418 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); | 394 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); |
419 | GDLM_ATTR(recover, 0644, recover_show, recover_store); | 395 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); |
420 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 396 | GDLM_ATTR(recover, 0200, NULL, recover_store); |
421 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | 397 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
398 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | ||
422 | 399 | ||
423 | static struct attribute *lock_module_attrs[] = { | 400 | static struct attribute *lock_module_attrs[] = { |
424 | &gdlm_attr_proto_name.attr, | 401 | &gdlm_attr_proto_name.attr, |
425 | &gdlm_attr_block.attr, | 402 | &gdlm_attr_block.attr, |
426 | &gdlm_attr_withdraw.attr, | 403 | &gdlm_attr_withdraw.attr, |
427 | &gdlm_attr_id.attr, | 404 | &gdlm_attr_id.attr, |
428 | &lockstruct_attr_jid.attr, | 405 | &gdlm_attr_jid.attr, |
429 | &gdlm_attr_first.attr, | 406 | &gdlm_attr_first.attr, |
430 | &gdlm_attr_first_done.attr, | 407 | &gdlm_attr_first_done.attr, |
431 | &gdlm_attr_recover.attr, | 408 | &gdlm_attr_recover.attr, |
@@ -435,53 +412,6 @@ static struct attribute *lock_module_attrs[] = { | |||
435 | }; | 412 | }; |
436 | 413 | ||
437 | /* | 414 | /* |
438 | * display struct gfs2_args fields | ||
439 | */ | ||
440 | |||
441 | struct args_attr { | ||
442 | struct attribute attr; | ||
443 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
444 | }; | ||
445 | |||
446 | #define ARGS_ATTR(name, fmt) \ | ||
447 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | ||
448 | { \ | ||
449 | return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \ | ||
450 | } \ | ||
451 | static struct args_attr args_attr_##name = __ATTR_RO(name) | ||
452 | |||
453 | ARGS_ATTR(lockproto, "%s\n"); | ||
454 | ARGS_ATTR(locktable, "%s\n"); | ||
455 | ARGS_ATTR(hostdata, "%s\n"); | ||
456 | ARGS_ATTR(spectator, "%d\n"); | ||
457 | ARGS_ATTR(ignore_local_fs, "%d\n"); | ||
458 | ARGS_ATTR(localcaching, "%d\n"); | ||
459 | ARGS_ATTR(localflocks, "%d\n"); | ||
460 | ARGS_ATTR(debug, "%d\n"); | ||
461 | ARGS_ATTR(upgrade, "%d\n"); | ||
462 | ARGS_ATTR(posix_acl, "%d\n"); | ||
463 | ARGS_ATTR(quota, "%u\n"); | ||
464 | ARGS_ATTR(suiddir, "%d\n"); | ||
465 | ARGS_ATTR(data, "%d\n"); | ||
466 | |||
467 | static struct attribute *args_attrs[] = { | ||
468 | &args_attr_lockproto.attr, | ||
469 | &args_attr_locktable.attr, | ||
470 | &args_attr_hostdata.attr, | ||
471 | &args_attr_spectator.attr, | ||
472 | &args_attr_ignore_local_fs.attr, | ||
473 | &args_attr_localcaching.attr, | ||
474 | &args_attr_localflocks.attr, | ||
475 | &args_attr_debug.attr, | ||
476 | &args_attr_upgrade.attr, | ||
477 | &args_attr_posix_acl.attr, | ||
478 | &args_attr_quota.attr, | ||
479 | &args_attr_suiddir.attr, | ||
480 | &args_attr_data.attr, | ||
481 | NULL, | ||
482 | }; | ||
483 | |||
484 | /* | ||
485 | * get and set struct gfs2_tune fields | 415 | * get and set struct gfs2_tune fields |
486 | */ | 416 | */ |
487 | 417 | ||
@@ -531,14 +461,8 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, | |||
531 | return len; | 461 | return len; |
532 | } | 462 | } |
533 | 463 | ||
534 | struct tune_attr { | ||
535 | struct attribute attr; | ||
536 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
537 | ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); | ||
538 | }; | ||
539 | |||
540 | #define TUNE_ATTR_3(name, show, store) \ | 464 | #define TUNE_ATTR_3(name, show, store) \ |
541 | static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store) | 465 | static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store) |
542 | 466 | ||
543 | #define TUNE_ATTR_2(name, store) \ | 467 | #define TUNE_ATTR_2(name, store) \ |
544 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | 468 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ |
@@ -554,15 +478,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ | |||
554 | } \ | 478 | } \ |
555 | TUNE_ATTR_2(name, name##_store) | 479 | TUNE_ATTR_2(name, name##_store) |
556 | 480 | ||
557 | #define TUNE_ATTR_DAEMON(name, process) \ | ||
558 | static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ | ||
559 | { \ | ||
560 | ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \ | ||
561 | wake_up_process(sdp->sd_##process); \ | ||
562 | return r; \ | ||
563 | } \ | ||
564 | TUNE_ATTR_2(name, name##_store) | ||
565 | |||
566 | TUNE_ATTR(incore_log_blocks, 0); | 481 | TUNE_ATTR(incore_log_blocks, 0); |
567 | TUNE_ATTR(log_flush_secs, 0); | 482 | TUNE_ATTR(log_flush_secs, 0); |
568 | TUNE_ATTR(quota_warn_period, 0); | 483 | TUNE_ATTR(quota_warn_period, 0); |
@@ -574,8 +489,6 @@ TUNE_ATTR(new_files_jdata, 0); | |||
574 | TUNE_ATTR(quota_simul_sync, 1); | 489 | TUNE_ATTR(quota_simul_sync, 1); |
575 | TUNE_ATTR(stall_secs, 1); | 490 | TUNE_ATTR(stall_secs, 1); |
576 | TUNE_ATTR(statfs_quantum, 1); | 491 | TUNE_ATTR(statfs_quantum, 1); |
577 | TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); | ||
578 | TUNE_ATTR_DAEMON(logd_secs, logd_process); | ||
579 | TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); | 492 | TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); |
580 | 493 | ||
581 | static struct attribute *tune_attrs[] = { | 494 | static struct attribute *tune_attrs[] = { |
@@ -589,23 +502,11 @@ static struct attribute *tune_attrs[] = { | |||
589 | &tune_attr_quota_simul_sync.attr, | 502 | &tune_attr_quota_simul_sync.attr, |
590 | &tune_attr_stall_secs.attr, | 503 | &tune_attr_stall_secs.attr, |
591 | &tune_attr_statfs_quantum.attr, | 504 | &tune_attr_statfs_quantum.attr, |
592 | &tune_attr_recoverd_secs.attr, | ||
593 | &tune_attr_logd_secs.attr, | ||
594 | &tune_attr_quota_scale.attr, | 505 | &tune_attr_quota_scale.attr, |
595 | &tune_attr_new_files_jdata.attr, | 506 | &tune_attr_new_files_jdata.attr, |
596 | NULL, | 507 | NULL, |
597 | }; | 508 | }; |
598 | 509 | ||
599 | static struct attribute_group lockstruct_group = { | ||
600 | .name = "lockstruct", | ||
601 | .attrs = lockstruct_attrs, | ||
602 | }; | ||
603 | |||
604 | static struct attribute_group args_group = { | ||
605 | .name = "args", | ||
606 | .attrs = args_attrs, | ||
607 | }; | ||
608 | |||
609 | static struct attribute_group tune_group = { | 510 | static struct attribute_group tune_group = { |
610 | .name = "tune", | 511 | .name = "tune", |
611 | .attrs = tune_attrs, | 512 | .attrs = tune_attrs, |
@@ -626,17 +527,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | |||
626 | if (error) | 527 | if (error) |
627 | goto fail; | 528 | goto fail; |
628 | 529 | ||
629 | error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group); | ||
630 | if (error) | ||
631 | goto fail_reg; | ||
632 | |||
633 | error = sysfs_create_group(&sdp->sd_kobj, &args_group); | ||
634 | if (error) | ||
635 | goto fail_lockstruct; | ||
636 | |||
637 | error = sysfs_create_group(&sdp->sd_kobj, &tune_group); | 530 | error = sysfs_create_group(&sdp->sd_kobj, &tune_group); |
638 | if (error) | 531 | if (error) |
639 | goto fail_args; | 532 | goto fail_reg; |
640 | 533 | ||
641 | error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group); | 534 | error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group); |
642 | if (error) | 535 | if (error) |
@@ -647,10 +540,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | |||
647 | 540 | ||
648 | fail_tune: | 541 | fail_tune: |
649 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); | 542 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); |
650 | fail_args: | ||
651 | sysfs_remove_group(&sdp->sd_kobj, &args_group); | ||
652 | fail_lockstruct: | ||
653 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | ||
654 | fail_reg: | 543 | fail_reg: |
655 | kobject_put(&sdp->sd_kobj); | 544 | kobject_put(&sdp->sd_kobj); |
656 | fail: | 545 | fail: |
@@ -661,8 +550,6 @@ fail: | |||
661 | void gfs2_sys_fs_del(struct gfs2_sbd *sdp) | 550 | void gfs2_sys_fs_del(struct gfs2_sbd *sdp) |
662 | { | 551 | { |
663 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); | 552 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); |
664 | sysfs_remove_group(&sdp->sd_kobj, &args_group); | ||
665 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | ||
666 | sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); | 553 | sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); |
667 | kobject_put(&sdp->sd_kobj); | 554 | kobject_put(&sdp->sd_kobj); |
668 | } | 555 | } |
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h new file mode 100644 index 000000000000..98d6ef1c1dc0 --- /dev/null +++ b/fs/gfs2/trace_gfs2.h | |||
@@ -0,0 +1,407 @@ | |||
1 | #if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ) | ||
2 | #define _TRACE_GFS2_H | ||
3 | |||
4 | #include <linux/tracepoint.h> | ||
5 | |||
6 | #undef TRACE_SYSTEM | ||
7 | #define TRACE_SYSTEM gfs2 | ||
8 | #define TRACE_INCLUDE_FILE trace_gfs2 | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/buffer_head.h> | ||
12 | #include <linux/dlmconstants.h> | ||
13 | #include <linux/gfs2_ondisk.h> | ||
14 | #include "incore.h" | ||
15 | #include "glock.h" | ||
16 | |||
17 | #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } | ||
18 | #define glock_trace_name(x) __print_symbolic(x, \ | ||
19 | dlm_state_name(IV), \ | ||
20 | dlm_state_name(NL), \ | ||
21 | dlm_state_name(CR), \ | ||
22 | dlm_state_name(CW), \ | ||
23 | dlm_state_name(PR), \ | ||
24 | dlm_state_name(PW), \ | ||
25 | dlm_state_name(EX)) | ||
26 | |||
27 | #define block_state_name(x) __print_symbolic(x, \ | ||
28 | { GFS2_BLKST_FREE, "free" }, \ | ||
29 | { GFS2_BLKST_USED, "used" }, \ | ||
30 | { GFS2_BLKST_DINODE, "dinode" }, \ | ||
31 | { GFS2_BLKST_UNLINKED, "unlinked" }) | ||
32 | |||
33 | #define show_glock_flags(flags) __print_flags(flags, "", \ | ||
34 | {(1UL << GLF_LOCK), "l" }, \ | ||
35 | {(1UL << GLF_DEMOTE), "D" }, \ | ||
36 | {(1UL << GLF_PENDING_DEMOTE), "d" }, \ | ||
37 | {(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \ | ||
38 | {(1UL << GLF_DIRTY), "y" }, \ | ||
39 | {(1UL << GLF_LFLUSH), "f" }, \ | ||
40 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ | ||
41 | {(1UL << GLF_REPLY_PENDING), "r" }, \ | ||
42 | {(1UL << GLF_INITIAL), "I" }, \ | ||
43 | {(1UL << GLF_FROZEN), "F" }) | ||
44 | |||
45 | #ifndef NUMPTY | ||
46 | #define NUMPTY | ||
47 | static inline u8 glock_trace_state(unsigned int state) | ||
48 | { | ||
49 | switch(state) { | ||
50 | case LM_ST_SHARED: | ||
51 | return DLM_LOCK_PR; | ||
52 | case LM_ST_DEFERRED: | ||
53 | return DLM_LOCK_CW; | ||
54 | case LM_ST_EXCLUSIVE: | ||
55 | return DLM_LOCK_EX; | ||
56 | } | ||
57 | return DLM_LOCK_NL; | ||
58 | } | ||
59 | #endif | ||
60 | |||
61 | /* Section 1 - Locking | ||
62 | * | ||
63 | * Objectives: | ||
64 | * Latency: Remote demote request to state change | ||
65 | * Latency: Local lock request to state change | ||
66 | * Latency: State change to lock grant | ||
67 | * Correctness: Ordering of local lock state vs. I/O requests | ||
68 | * Correctness: Responses to remote demote requests | ||
69 | */ | ||
70 | |||
71 | /* General glock state change (DLM lock request completes) */ | ||
72 | TRACE_EVENT(gfs2_glock_state_change, | ||
73 | |||
74 | TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state), | ||
75 | |||
76 | TP_ARGS(gl, new_state), | ||
77 | |||
78 | TP_STRUCT__entry( | ||
79 | __field( dev_t, dev ) | ||
80 | __field( u64, glnum ) | ||
81 | __field( u32, gltype ) | ||
82 | __field( u8, cur_state ) | ||
83 | __field( u8, new_state ) | ||
84 | __field( u8, dmt_state ) | ||
85 | __field( u8, tgt_state ) | ||
86 | __field( unsigned long, flags ) | ||
87 | ), | ||
88 | |||
89 | TP_fast_assign( | ||
90 | __entry->dev = gl->gl_sbd->sd_vfs->s_dev; | ||
91 | __entry->glnum = gl->gl_name.ln_number; | ||
92 | __entry->gltype = gl->gl_name.ln_type; | ||
93 | __entry->cur_state = glock_trace_state(gl->gl_state); | ||
94 | __entry->new_state = glock_trace_state(new_state); | ||
95 | __entry->tgt_state = glock_trace_state(gl->gl_target); | ||
96 | __entry->dmt_state = glock_trace_state(gl->gl_demote_state); | ||
97 | __entry->flags = gl->gl_flags; | ||
98 | ), | ||
99 | |||
100 | TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", | ||
101 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, | ||
102 | (unsigned long long)__entry->glnum, | ||
103 | glock_trace_name(__entry->cur_state), | ||
104 | glock_trace_name(__entry->new_state), | ||
105 | glock_trace_name(__entry->tgt_state), | ||
106 | glock_trace_name(__entry->dmt_state), | ||
107 | show_glock_flags(__entry->flags)) | ||
108 | ); | ||
109 | |||
110 | /* State change -> unlocked, glock is being deallocated */ | ||
111 | TRACE_EVENT(gfs2_glock_put, | ||
112 | |||
113 | TP_PROTO(const struct gfs2_glock *gl), | ||
114 | |||
115 | TP_ARGS(gl), | ||
116 | |||
117 | TP_STRUCT__entry( | ||
118 | __field( dev_t, dev ) | ||
119 | __field( u64, glnum ) | ||
120 | __field( u32, gltype ) | ||
121 | __field( u8, cur_state ) | ||
122 | __field( unsigned long, flags ) | ||
123 | ), | ||
124 | |||
125 | TP_fast_assign( | ||
126 | __entry->dev = gl->gl_sbd->sd_vfs->s_dev; | ||
127 | __entry->gltype = gl->gl_name.ln_type; | ||
128 | __entry->glnum = gl->gl_name.ln_number; | ||
129 | __entry->cur_state = glock_trace_state(gl->gl_state); | ||
130 | __entry->flags = gl->gl_flags; | ||
131 | ), | ||
132 | |||
133 | TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", | ||
134 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
135 | __entry->gltype, (unsigned long long)__entry->glnum, | ||
136 | glock_trace_name(__entry->cur_state), | ||
137 | glock_trace_name(DLM_LOCK_IV), | ||
138 | show_glock_flags(__entry->flags)) | ||
139 | |||
140 | ); | ||
141 | |||
142 | /* Callback (local or remote) requesting lock demotion */ | ||
143 | TRACE_EVENT(gfs2_demote_rq, | ||
144 | |||
145 | TP_PROTO(const struct gfs2_glock *gl), | ||
146 | |||
147 | TP_ARGS(gl), | ||
148 | |||
149 | TP_STRUCT__entry( | ||
150 | __field( dev_t, dev ) | ||
151 | __field( u64, glnum ) | ||
152 | __field( u32, gltype ) | ||
153 | __field( u8, cur_state ) | ||
154 | __field( u8, dmt_state ) | ||
155 | __field( unsigned long, flags ) | ||
156 | ), | ||
157 | |||
158 | TP_fast_assign( | ||
159 | __entry->dev = gl->gl_sbd->sd_vfs->s_dev; | ||
160 | __entry->gltype = gl->gl_name.ln_type; | ||
161 | __entry->glnum = gl->gl_name.ln_number; | ||
162 | __entry->cur_state = glock_trace_state(gl->gl_state); | ||
163 | __entry->dmt_state = glock_trace_state(gl->gl_demote_state); | ||
164 | __entry->flags = gl->gl_flags; | ||
165 | ), | ||
166 | |||
167 | TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", | ||
168 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, | ||
169 | (unsigned long long)__entry->glnum, | ||
170 | glock_trace_name(__entry->cur_state), | ||
171 | glock_trace_name(__entry->dmt_state), | ||
172 | show_glock_flags(__entry->flags)) | ||
173 | |||
174 | ); | ||
175 | |||
176 | /* Promotion/grant of a glock */ | ||
177 | TRACE_EVENT(gfs2_promote, | ||
178 | |||
179 | TP_PROTO(const struct gfs2_holder *gh, int first), | ||
180 | |||
181 | TP_ARGS(gh, first), | ||
182 | |||
183 | TP_STRUCT__entry( | ||
184 | __field( dev_t, dev ) | ||
185 | __field( u64, glnum ) | ||
186 | __field( u32, gltype ) | ||
187 | __field( int, first ) | ||
188 | __field( u8, state ) | ||
189 | ), | ||
190 | |||
191 | TP_fast_assign( | ||
192 | __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; | ||
193 | __entry->glnum = gh->gh_gl->gl_name.ln_number; | ||
194 | __entry->gltype = gh->gh_gl->gl_name.ln_type; | ||
195 | __entry->first = first; | ||
196 | __entry->state = glock_trace_state(gh->gh_state); | ||
197 | ), | ||
198 | |||
199 | TP_printk("%u,%u glock %u:%llu promote %s %s", | ||
200 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, | ||
201 | (unsigned long long)__entry->glnum, | ||
202 | __entry->first ? "first": "other", | ||
203 | glock_trace_name(__entry->state)) | ||
204 | ); | ||
205 | |||
206 | /* Queue/dequeue a lock request */ | ||
207 | TRACE_EVENT(gfs2_glock_queue, | ||
208 | |||
209 | TP_PROTO(const struct gfs2_holder *gh, int queue), | ||
210 | |||
211 | TP_ARGS(gh, queue), | ||
212 | |||
213 | TP_STRUCT__entry( | ||
214 | __field( dev_t, dev ) | ||
215 | __field( u64, glnum ) | ||
216 | __field( u32, gltype ) | ||
217 | __field( int, queue ) | ||
218 | __field( u8, state ) | ||
219 | ), | ||
220 | |||
221 | TP_fast_assign( | ||
222 | __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; | ||
223 | __entry->glnum = gh->gh_gl->gl_name.ln_number; | ||
224 | __entry->gltype = gh->gh_gl->gl_name.ln_type; | ||
225 | __entry->queue = queue; | ||
226 | __entry->state = glock_trace_state(gh->gh_state); | ||
227 | ), | ||
228 | |||
229 | TP_printk("%u,%u glock %u:%llu %squeue %s", | ||
230 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, | ||
231 | (unsigned long long)__entry->glnum, | ||
232 | __entry->queue ? "" : "de", | ||
233 | glock_trace_name(__entry->state)) | ||
234 | ); | ||
235 | |||
236 | /* Section 2 - Log/journal | ||
237 | * | ||
238 | * Objectives: | ||
239 | * Latency: Log flush time | ||
240 | * Correctness: pin/unpin vs. disk I/O ordering | ||
241 | * Performance: Log usage stats | ||
242 | */ | ||
243 | |||
244 | /* Pin/unpin a block in the log */ | ||
245 | TRACE_EVENT(gfs2_pin, | ||
246 | |||
247 | TP_PROTO(const struct gfs2_bufdata *bd, int pin), | ||
248 | |||
249 | TP_ARGS(bd, pin), | ||
250 | |||
251 | TP_STRUCT__entry( | ||
252 | __field( dev_t, dev ) | ||
253 | __field( int, pin ) | ||
254 | __field( u32, len ) | ||
255 | __field( sector_t, block ) | ||
256 | __field( u64, ino ) | ||
257 | ), | ||
258 | |||
259 | TP_fast_assign( | ||
260 | __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev; | ||
261 | __entry->pin = pin; | ||
262 | __entry->len = bd->bd_bh->b_size; | ||
263 | __entry->block = bd->bd_bh->b_blocknr; | ||
264 | __entry->ino = bd->bd_gl->gl_name.ln_number; | ||
265 | ), | ||
266 | |||
267 | TP_printk("%u,%u log %s %llu/%lu inode %llu", | ||
268 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
269 | __entry->pin ? "pin" : "unpin", | ||
270 | (unsigned long long)__entry->block, | ||
271 | (unsigned long)__entry->len, | ||
272 | (unsigned long long)__entry->ino) | ||
273 | ); | ||
274 | |||
275 | /* Flushing the log */ | ||
276 | TRACE_EVENT(gfs2_log_flush, | ||
277 | |||
278 | TP_PROTO(const struct gfs2_sbd *sdp, int start), | ||
279 | |||
280 | TP_ARGS(sdp, start), | ||
281 | |||
282 | TP_STRUCT__entry( | ||
283 | __field( dev_t, dev ) | ||
284 | __field( int, start ) | ||
285 | __field( u64, log_seq ) | ||
286 | ), | ||
287 | |||
288 | TP_fast_assign( | ||
289 | __entry->dev = sdp->sd_vfs->s_dev; | ||
290 | __entry->start = start; | ||
291 | __entry->log_seq = sdp->sd_log_sequence; | ||
292 | ), | ||
293 | |||
294 | TP_printk("%u,%u log flush %s %llu", | ||
295 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
296 | __entry->start ? "start" : "end", | ||
297 | (unsigned long long)__entry->log_seq) | ||
298 | ); | ||
299 | |||
300 | /* Reserving/releasing blocks in the log */ | ||
301 | TRACE_EVENT(gfs2_log_blocks, | ||
302 | |||
303 | TP_PROTO(const struct gfs2_sbd *sdp, int blocks), | ||
304 | |||
305 | TP_ARGS(sdp, blocks), | ||
306 | |||
307 | TP_STRUCT__entry( | ||
308 | __field( dev_t, dev ) | ||
309 | __field( int, blocks ) | ||
310 | ), | ||
311 | |||
312 | TP_fast_assign( | ||
313 | __entry->dev = sdp->sd_vfs->s_dev; | ||
314 | __entry->blocks = blocks; | ||
315 | ), | ||
316 | |||
317 | TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev), | ||
318 | MINOR(__entry->dev), __entry->blocks) | ||
319 | ); | ||
320 | |||
321 | /* Section 3 - bmap | ||
322 | * | ||
323 | * Objectives: | ||
324 | * Latency: Bmap request time | ||
325 | * Performance: Block allocator tracing | ||
326 | * Correctness: Test of disard generation vs. blocks allocated | ||
327 | */ | ||
328 | |||
329 | /* Map an extent of blocks, possibly a new allocation */ | ||
330 | TRACE_EVENT(gfs2_bmap, | ||
331 | |||
332 | TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh, | ||
333 | sector_t lblock, int create, int errno), | ||
334 | |||
335 | TP_ARGS(ip, bh, lblock, create, errno), | ||
336 | |||
337 | TP_STRUCT__entry( | ||
338 | __field( dev_t, dev ) | ||
339 | __field( sector_t, lblock ) | ||
340 | __field( sector_t, pblock ) | ||
341 | __field( u64, inum ) | ||
342 | __field( unsigned long, state ) | ||
343 | __field( u32, len ) | ||
344 | __field( int, create ) | ||
345 | __field( int, errno ) | ||
346 | ), | ||
347 | |||
348 | TP_fast_assign( | ||
349 | __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; | ||
350 | __entry->lblock = lblock; | ||
351 | __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0; | ||
352 | __entry->inum = ip->i_no_addr; | ||
353 | __entry->state = bh->b_state; | ||
354 | __entry->len = bh->b_size; | ||
355 | __entry->create = create; | ||
356 | __entry->errno = errno; | ||
357 | ), | ||
358 | |||
359 | TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d", | ||
360 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
361 | (unsigned long long)__entry->inum, | ||
362 | (unsigned long long)__entry->lblock, | ||
363 | (unsigned long)__entry->len, | ||
364 | (unsigned long long)__entry->pblock, | ||
365 | __entry->state, __entry->create ? "create " : "nocreate", | ||
366 | __entry->errno) | ||
367 | ); | ||
368 | |||
369 | /* Keep track of blocks as they are allocated/freed */ | ||
370 | TRACE_EVENT(gfs2_block_alloc, | ||
371 | |||
372 | TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len, | ||
373 | u8 block_state), | ||
374 | |||
375 | TP_ARGS(ip, block, len, block_state), | ||
376 | |||
377 | TP_STRUCT__entry( | ||
378 | __field( dev_t, dev ) | ||
379 | __field( u64, start ) | ||
380 | __field( u64, inum ) | ||
381 | __field( u32, len ) | ||
382 | __field( u8, block_state ) | ||
383 | ), | ||
384 | |||
385 | TP_fast_assign( | ||
386 | __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; | ||
387 | __entry->start = block; | ||
388 | __entry->inum = ip->i_no_addr; | ||
389 | __entry->len = len; | ||
390 | __entry->block_state = block_state; | ||
391 | ), | ||
392 | |||
393 | TP_printk("%u,%u bmap %llu alloc %llu/%lu %s", | ||
394 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
395 | (unsigned long long)__entry->inum, | ||
396 | (unsigned long long)__entry->start, | ||
397 | (unsigned long)__entry->len, | ||
398 | block_state_name(__entry->block_state)) | ||
399 | ); | ||
400 | |||
401 | #endif /* _TRACE_GFS2_H */ | ||
402 | |||
403 | /* This part must be outside protection */ | ||
404 | #undef TRACE_INCLUDE_PATH | ||
405 | #define TRACE_INCLUDE_PATH . | ||
406 | #include <trace/define_trace.h> | ||
407 | |||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 053752d4b27f..4ef0e9fa3549 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -33,6 +33,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
33 | BUG_ON(current->journal_info); | 33 | BUG_ON(current->journal_info); |
34 | BUG_ON(blocks == 0 && revokes == 0); | 34 | BUG_ON(blocks == 0 && revokes == 0); |
35 | 35 | ||
36 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) | ||
37 | return -EROFS; | ||
38 | |||
36 | tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); | 39 | tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); |
37 | if (!tr) | 40 | if (!tr) |
38 | return -ENOMEM; | 41 | return -ENOMEM; |
@@ -54,12 +57,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
54 | if (error) | 57 | if (error) |
55 | goto fail_holder_uninit; | 58 | goto fail_holder_uninit; |
56 | 59 | ||
57 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
58 | tr->tr_t_gh.gh_flags |= GL_NOCACHE; | ||
59 | error = -EROFS; | ||
60 | goto fail_gunlock; | ||
61 | } | ||
62 | |||
63 | error = gfs2_log_reserve(sdp, tr->tr_reserved); | 60 | error = gfs2_log_reserve(sdp, tr->tr_reserved); |
64 | if (error) | 61 | if (error) |
65 | goto fail_gunlock; | 62 | goto fail_gunlock; |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index a36bb749926d..6f833dc8e910 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -49,11 +49,23 @@ MODULE_LICENSE("GPL"); | |||
49 | */ | 49 | */ |
50 | static void hfs_write_super(struct super_block *sb) | 50 | static void hfs_write_super(struct super_block *sb) |
51 | { | 51 | { |
52 | lock_super(sb); | ||
52 | sb->s_dirt = 0; | 53 | sb->s_dirt = 0; |
53 | if (sb->s_flags & MS_RDONLY) | 54 | |
54 | return; | ||
55 | /* sync everything to the buffers */ | 55 | /* sync everything to the buffers */ |
56 | if (!(sb->s_flags & MS_RDONLY)) | ||
57 | hfs_mdb_commit(sb); | ||
58 | unlock_super(sb); | ||
59 | } | ||
60 | |||
61 | static int hfs_sync_fs(struct super_block *sb, int wait) | ||
62 | { | ||
63 | lock_super(sb); | ||
56 | hfs_mdb_commit(sb); | 64 | hfs_mdb_commit(sb); |
65 | sb->s_dirt = 0; | ||
66 | unlock_super(sb); | ||
67 | |||
68 | return 0; | ||
57 | } | 69 | } |
58 | 70 | ||
59 | /* | 71 | /* |
@@ -65,9 +77,15 @@ static void hfs_write_super(struct super_block *sb) | |||
65 | */ | 77 | */ |
66 | static void hfs_put_super(struct super_block *sb) | 78 | static void hfs_put_super(struct super_block *sb) |
67 | { | 79 | { |
80 | lock_kernel(); | ||
81 | |||
82 | if (sb->s_dirt) | ||
83 | hfs_write_super(sb); | ||
68 | hfs_mdb_close(sb); | 84 | hfs_mdb_close(sb); |
69 | /* release the MDB's resources */ | 85 | /* release the MDB's resources */ |
70 | hfs_mdb_put(sb); | 86 | hfs_mdb_put(sb); |
87 | |||
88 | unlock_kernel(); | ||
71 | } | 89 | } |
72 | 90 | ||
73 | /* | 91 | /* |
@@ -164,6 +182,7 @@ static const struct super_operations hfs_super_operations = { | |||
164 | .clear_inode = hfs_clear_inode, | 182 | .clear_inode = hfs_clear_inode, |
165 | .put_super = hfs_put_super, | 183 | .put_super = hfs_put_super, |
166 | .write_super = hfs_write_super, | 184 | .write_super = hfs_write_super, |
185 | .sync_fs = hfs_sync_fs, | ||
167 | .statfs = hfs_statfs, | 186 | .statfs = hfs_statfs, |
168 | .remount_fs = hfs_remount, | 187 | .remount_fs = hfs_remount, |
169 | .show_options = hfs_show_options, | 188 | .show_options = hfs_show_options, |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index f2a64020f42e..9fc3af0c0dab 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -152,15 +152,14 @@ static void hfsplus_clear_inode(struct inode *inode) | |||
152 | } | 152 | } |
153 | } | 153 | } |
154 | 154 | ||
155 | static void hfsplus_write_super(struct super_block *sb) | 155 | static int hfsplus_sync_fs(struct super_block *sb, int wait) |
156 | { | 156 | { |
157 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 157 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; |
158 | 158 | ||
159 | dprint(DBG_SUPER, "hfsplus_write_super\n"); | 159 | dprint(DBG_SUPER, "hfsplus_write_super\n"); |
160 | |||
161 | lock_super(sb); | ||
160 | sb->s_dirt = 0; | 162 | sb->s_dirt = 0; |
161 | if (sb->s_flags & MS_RDONLY) | ||
162 | /* warn? */ | ||
163 | return; | ||
164 | 163 | ||
165 | vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); | 164 | vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); |
166 | vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); | 165 | vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); |
@@ -192,6 +191,16 @@ static void hfsplus_write_super(struct super_block *sb) | |||
192 | } | 191 | } |
193 | HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; | 192 | HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; |
194 | } | 193 | } |
194 | unlock_super(sb); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | static void hfsplus_write_super(struct super_block *sb) | ||
199 | { | ||
200 | if (!(sb->s_flags & MS_RDONLY)) | ||
201 | hfsplus_sync_fs(sb, 1); | ||
202 | else | ||
203 | sb->s_dirt = 0; | ||
195 | } | 204 | } |
196 | 205 | ||
197 | static void hfsplus_put_super(struct super_block *sb) | 206 | static void hfsplus_put_super(struct super_block *sb) |
@@ -199,6 +208,11 @@ static void hfsplus_put_super(struct super_block *sb) | |||
199 | dprint(DBG_SUPER, "hfsplus_put_super\n"); | 208 | dprint(DBG_SUPER, "hfsplus_put_super\n"); |
200 | if (!sb->s_fs_info) | 209 | if (!sb->s_fs_info) |
201 | return; | 210 | return; |
211 | |||
212 | lock_kernel(); | ||
213 | |||
214 | if (sb->s_dirt) | ||
215 | hfsplus_write_super(sb); | ||
202 | if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { | 216 | if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { |
203 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 217 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; |
204 | 218 | ||
@@ -218,6 +232,8 @@ static void hfsplus_put_super(struct super_block *sb) | |||
218 | unload_nls(HFSPLUS_SB(sb).nls); | 232 | unload_nls(HFSPLUS_SB(sb).nls); |
219 | kfree(sb->s_fs_info); | 233 | kfree(sb->s_fs_info); |
220 | sb->s_fs_info = NULL; | 234 | sb->s_fs_info = NULL; |
235 | |||
236 | unlock_kernel(); | ||
221 | } | 237 | } |
222 | 238 | ||
223 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | 239 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) |
@@ -279,6 +295,7 @@ static const struct super_operations hfsplus_sops = { | |||
279 | .clear_inode = hfsplus_clear_inode, | 295 | .clear_inode = hfsplus_clear_inode, |
280 | .put_super = hfsplus_put_super, | 296 | .put_super = hfsplus_put_super, |
281 | .write_super = hfsplus_write_super, | 297 | .write_super = hfsplus_write_super, |
298 | .sync_fs = hfsplus_sync_fs, | ||
282 | .statfs = hfsplus_statfs, | 299 | .statfs = hfsplus_statfs, |
283 | .remount_fs = hfsplus_remount, | 300 | .remount_fs = hfsplus_remount, |
284 | .show_options = hfsplus_show_options, | 301 | .show_options = hfsplus_show_options, |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fc77965be841..f2feaa06bf26 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/statfs.h> | 13 | #include <linux/statfs.h> |
14 | #include <linux/magic.h> | 14 | #include <linux/magic.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/smp_lock.h> | ||
16 | 17 | ||
17 | /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ | 18 | /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ |
18 | 19 | ||
@@ -99,11 +100,16 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, | |||
99 | static void hpfs_put_super(struct super_block *s) | 100 | static void hpfs_put_super(struct super_block *s) |
100 | { | 101 | { |
101 | struct hpfs_sb_info *sbi = hpfs_sb(s); | 102 | struct hpfs_sb_info *sbi = hpfs_sb(s); |
103 | |||
104 | lock_kernel(); | ||
105 | |||
102 | kfree(sbi->sb_cp_table); | 106 | kfree(sbi->sb_cp_table); |
103 | kfree(sbi->sb_bmp_dir); | 107 | kfree(sbi->sb_bmp_dir); |
104 | unmark_dirty(s); | 108 | unmark_dirty(s); |
105 | s->s_fs_info = NULL; | 109 | s->s_fs_info = NULL; |
106 | kfree(sbi); | 110 | kfree(sbi); |
111 | |||
112 | unlock_kernel(); | ||
107 | } | 113 | } |
108 | 114 | ||
109 | unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) | 115 | unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) |
@@ -393,6 +399,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) | |||
393 | 399 | ||
394 | *flags |= MS_NOATIME; | 400 | *flags |= MS_NOATIME; |
395 | 401 | ||
402 | lock_kernel(); | ||
403 | lock_super(s); | ||
396 | uid = sbi->sb_uid; gid = sbi->sb_gid; | 404 | uid = sbi->sb_uid; gid = sbi->sb_gid; |
397 | umask = 0777 & ~sbi->sb_mode; | 405 | umask = 0777 & ~sbi->sb_mode; |
398 | lowercase = sbi->sb_lowercase; conv = sbi->sb_conv; | 406 | lowercase = sbi->sb_lowercase; conv = sbi->sb_conv; |
@@ -425,9 +433,13 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) | |||
425 | 433 | ||
426 | replace_mount_options(s, new_opts); | 434 | replace_mount_options(s, new_opts); |
427 | 435 | ||
436 | unlock_super(s); | ||
437 | unlock_kernel(); | ||
428 | return 0; | 438 | return 0; |
429 | 439 | ||
430 | out_err: | 440 | out_err: |
441 | unlock_super(s); | ||
442 | unlock_kernel(); | ||
431 | kfree(new_opts); | 443 | kfree(new_opts); |
432 | return -EINVAL; | 444 | return -EINVAL; |
433 | } | 445 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c1462d43e721..941c8425c10b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/dnotify.h> | 30 | #include <linux/dnotify.h> |
31 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/ima.h> | ||
33 | 34 | ||
34 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
35 | 36 | ||
@@ -986,6 +987,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) | |||
986 | &hugetlbfs_file_operations); | 987 | &hugetlbfs_file_operations); |
987 | if (!file) | 988 | if (!file) |
988 | goto out_dentry; /* inode is already attached */ | 989 | goto out_dentry; /* inode is already attached */ |
990 | ima_counts_get(file); | ||
989 | 991 | ||
990 | return file; | 992 | return file; |
991 | 993 | ||
diff --git a/fs/inode.c b/fs/inode.c index bca0c618fdb3..a88baebf77cf 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/cdev.h> | 22 | #include <linux/cdev.h> |
23 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
24 | #include <linux/inotify.h> | 24 | #include <linux/inotify.h> |
25 | #include <linux/fsnotify.h> | ||
25 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
26 | #include <linux/async.h> | 27 | #include <linux/async.h> |
27 | 28 | ||
@@ -189,6 +190,10 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | |||
189 | inode->i_private = NULL; | 190 | inode->i_private = NULL; |
190 | inode->i_mapping = mapping; | 191 | inode->i_mapping = mapping; |
191 | 192 | ||
193 | #ifdef CONFIG_FSNOTIFY | ||
194 | inode->i_fsnotify_mask = 0; | ||
195 | #endif | ||
196 | |||
192 | return inode; | 197 | return inode; |
193 | 198 | ||
194 | out_free_security: | 199 | out_free_security: |
@@ -221,6 +226,7 @@ void destroy_inode(struct inode *inode) | |||
221 | BUG_ON(inode_has_buffers(inode)); | 226 | BUG_ON(inode_has_buffers(inode)); |
222 | ima_inode_free(inode); | 227 | ima_inode_free(inode); |
223 | security_inode_free(inode); | 228 | security_inode_free(inode); |
229 | fsnotify_inode_delete(inode); | ||
224 | if (inode->i_sb->s_op->destroy_inode) | 230 | if (inode->i_sb->s_op->destroy_inode) |
225 | inode->i_sb->s_op->destroy_inode(inode); | 231 | inode->i_sb->s_op->destroy_inode(inode); |
226 | else | 232 | else |
@@ -252,6 +258,9 @@ void inode_init_once(struct inode *inode) | |||
252 | INIT_LIST_HEAD(&inode->inotify_watches); | 258 | INIT_LIST_HEAD(&inode->inotify_watches); |
253 | mutex_init(&inode->inotify_mutex); | 259 | mutex_init(&inode->inotify_mutex); |
254 | #endif | 260 | #endif |
261 | #ifdef CONFIG_FSNOTIFY | ||
262 | INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); | ||
263 | #endif | ||
255 | } | 264 | } |
256 | EXPORT_SYMBOL(inode_init_once); | 265 | EXPORT_SYMBOL(inode_init_once); |
257 | 266 | ||
@@ -398,6 +407,7 @@ int invalidate_inodes(struct super_block *sb) | |||
398 | mutex_lock(&iprune_mutex); | 407 | mutex_lock(&iprune_mutex); |
399 | spin_lock(&inode_lock); | 408 | spin_lock(&inode_lock); |
400 | inotify_unmount_inodes(&sb->s_inodes); | 409 | inotify_unmount_inodes(&sb->s_inodes); |
410 | fsnotify_unmount_inodes(&sb->s_inodes); | ||
401 | busy = invalidate_list(&sb->s_inodes, &throw_away); | 411 | busy = invalidate_list(&sb->s_inodes, &throw_away); |
402 | spin_unlock(&inode_lock); | 412 | spin_unlock(&inode_lock); |
403 | 413 | ||
@@ -1412,7 +1422,7 @@ void file_update_time(struct file *file) | |||
1412 | if (IS_NOCMTIME(inode)) | 1422 | if (IS_NOCMTIME(inode)) |
1413 | return; | 1423 | return; |
1414 | 1424 | ||
1415 | err = mnt_want_write(file->f_path.mnt); | 1425 | err = mnt_want_write_file(file); |
1416 | if (err) | 1426 | if (err) |
1417 | return; | 1427 | return; |
1418 | 1428 | ||
diff --git a/fs/internal.h b/fs/internal.h index b4dac4fb6b61..d55ef562f0bb 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) | |||
25 | return sb == blockdev_superblock; | 25 | return sb == blockdev_superblock; |
26 | } | 26 | } |
27 | 27 | ||
28 | extern int __sync_blockdev(struct block_device *bdev, int wait); | ||
29 | |||
28 | #else | 30 | #else |
29 | static inline void bdev_cache_init(void) | 31 | static inline void bdev_cache_init(void) |
30 | { | 32 | { |
@@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) | |||
34 | { | 36 | { |
35 | return 0; | 37 | return 0; |
36 | } | 38 | } |
39 | |||
40 | static inline int __sync_blockdev(struct block_device *bdev, int wait) | ||
41 | { | ||
42 | return 0; | ||
43 | } | ||
37 | #endif | 44 | #endif |
38 | 45 | ||
39 | /* | 46 | /* |
@@ -66,3 +73,13 @@ extern void __init mnt_init(void); | |||
66 | * fs_struct.c | 73 | * fs_struct.c |
67 | */ | 74 | */ |
68 | extern void chroot_fs_refs(struct path *, struct path *); | 75 | extern void chroot_fs_refs(struct path *, struct path *); |
76 | |||
77 | /* | ||
78 | * file_table.c | ||
79 | */ | ||
80 | extern void mark_files_ro(struct super_block *); | ||
81 | |||
82 | /* | ||
83 | * super.c | ||
84 | */ | ||
85 | extern int do_remount_sb(struct super_block *, int, void *, int); | ||
diff --git a/fs/ioctl.c b/fs/ioctl.c index 82d9c42b8bac..286f38dfc6c0 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd, | |||
414 | switch (cmd) { | 414 | switch (cmd) { |
415 | case FIBMAP: | 415 | case FIBMAP: |
416 | return ioctl_fibmap(filp, p); | 416 | return ioctl_fibmap(filp, p); |
417 | case FS_IOC_FIEMAP: | ||
418 | return ioctl_fiemap(filp, arg); | ||
419 | case FIGETBSZ: | ||
420 | return put_user(inode->i_sb->s_blocksize, p); | ||
421 | case FIONREAD: | 417 | case FIONREAD: |
422 | return put_user(i_size_read(inode) - filp->f_pos, p); | 418 | return put_user(i_size_read(inode) - filp->f_pos, p); |
423 | } | 419 | } |
@@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | |||
557 | error = ioctl_fsthaw(filp); | 553 | error = ioctl_fsthaw(filp); |
558 | break; | 554 | break; |
559 | 555 | ||
556 | case FS_IOC_FIEMAP: | ||
557 | return ioctl_fiemap(filp, arg); | ||
558 | |||
559 | case FIGETBSZ: | ||
560 | { | ||
561 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
562 | int __user *p = (int __user *)arg; | ||
563 | return put_user(inode->i_sb->s_blocksize, p); | ||
564 | } | ||
565 | |||
560 | default: | 566 | default: |
561 | if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) | 567 | if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) |
562 | error = file_ioctl(filp, cmd, arg); | 568 | error = file_ioctl(filp, cmd, arg); |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index b4cbe9603c7d..068b34b5a107 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -42,11 +42,16 @@ static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qst | |||
42 | static void isofs_put_super(struct super_block *sb) | 42 | static void isofs_put_super(struct super_block *sb) |
43 | { | 43 | { |
44 | struct isofs_sb_info *sbi = ISOFS_SB(sb); | 44 | struct isofs_sb_info *sbi = ISOFS_SB(sb); |
45 | |||
45 | #ifdef CONFIG_JOLIET | 46 | #ifdef CONFIG_JOLIET |
47 | lock_kernel(); | ||
48 | |||
46 | if (sbi->s_nls_iocharset) { | 49 | if (sbi->s_nls_iocharset) { |
47 | unload_nls(sbi->s_nls_iocharset); | 50 | unload_nls(sbi->s_nls_iocharset); |
48 | sbi->s_nls_iocharset = NULL; | 51 | sbi->s_nls_iocharset = NULL; |
49 | } | 52 | } |
53 | |||
54 | unlock_kernel(); | ||
50 | #endif | 55 | #endif |
51 | 56 | ||
52 | kfree(sbi); | 57 | kfree(sbi); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 58144102bf25..62be7d294ec2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1781 | * Journal abort has very specific semantics, which we describe | 1781 | * Journal abort has very specific semantics, which we describe |
1782 | * for journal abort. | 1782 | * for journal abort. |
1783 | * | 1783 | * |
1784 | * Two internal function, which provide abort to te jbd layer | 1784 | * Two internal functions, which provide abort to the jbd layer |
1785 | * itself are here. | 1785 | * itself are here. |
1786 | */ | 1786 | */ |
1787 | 1787 | ||
@@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) | |||
1879 | * int jbd2_journal_errno () - returns the journal's error state. | 1879 | * int jbd2_journal_errno () - returns the journal's error state. |
1880 | * @journal: journal to examine. | 1880 | * @journal: journal to examine. |
1881 | * | 1881 | * |
1882 | * This is the errno numbet set with jbd2_journal_abort(), the last | 1882 | * This is the errno number set with jbd2_journal_abort(), the last |
1883 | * time the journal was mounted - if the journal was stopped | 1883 | * time the journal was mounted - if the journal was stopped |
1884 | * without calling abort this will be 0. | 1884 | * without calling abort this will be 0. |
1885 | * | 1885 | * |
@@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal) | |||
1903 | * int jbd2_journal_clear_err () - clears the journal's error state | 1903 | * int jbd2_journal_clear_err () - clears the journal's error state |
1904 | * @journal: journal to act on. | 1904 | * @journal: journal to act on. |
1905 | * | 1905 | * |
1906 | * An error must be cleared or Acked to take a FS out of readonly | 1906 | * An error must be cleared or acked to take a FS out of readonly |
1907 | * mode. | 1907 | * mode. |
1908 | */ | 1908 | */ |
1909 | int jbd2_journal_clear_err(journal_t *journal) | 1909 | int jbd2_journal_clear_err(journal_t *journal) |
@@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal) | |||
1923 | * void jbd2_journal_ack_err() - Ack journal err. | 1923 | * void jbd2_journal_ack_err() - Ack journal err. |
1924 | * @journal: journal to act on. | 1924 | * @journal: journal to act on. |
1925 | * | 1925 | * |
1926 | * An error must be cleared or Acked to take a FS out of readonly | 1926 | * An error must be cleared or acked to take a FS out of readonly |
1927 | * mode. | 1927 | * mode. |
1928 | */ | 1928 | */ |
1929 | void jbd2_journal_ack_err(journal_t *journal) | 1929 | void jbd2_journal_ack_err(journal_t *journal) |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 249305d65d5b..3451a81b2142 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/vmalloc.h> | 20 | #include <linux/vmalloc.h> |
21 | #include <linux/vfs.h> | 21 | #include <linux/vfs.h> |
22 | #include <linux/crc32.h> | 22 | #include <linux/crc32.h> |
23 | #include <linux/smp_lock.h> | ||
23 | #include "nodelist.h" | 24 | #include "nodelist.h" |
24 | 25 | ||
25 | static int jffs2_flash_setup(struct jffs2_sb_info *c); | 26 | static int jffs2_flash_setup(struct jffs2_sb_info *c); |
@@ -387,6 +388,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) | |||
387 | This also catches the case where it was stopped and this | 388 | This also catches the case where it was stopped and this |
388 | is just a remount to restart it. | 389 | is just a remount to restart it. |
389 | Flush the writebuffer, if neccecary, else we loose it */ | 390 | Flush the writebuffer, if neccecary, else we loose it */ |
391 | lock_kernel(); | ||
390 | if (!(sb->s_flags & MS_RDONLY)) { | 392 | if (!(sb->s_flags & MS_RDONLY)) { |
391 | jffs2_stop_garbage_collect_thread(c); | 393 | jffs2_stop_garbage_collect_thread(c); |
392 | mutex_lock(&c->alloc_sem); | 394 | mutex_lock(&c->alloc_sem); |
@@ -399,24 +401,10 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) | |||
399 | 401 | ||
400 | *flags |= MS_NOATIME; | 402 | *flags |= MS_NOATIME; |
401 | 403 | ||
404 | unlock_kernel(); | ||
402 | return 0; | 405 | return 0; |
403 | } | 406 | } |
404 | 407 | ||
405 | void jffs2_write_super (struct super_block *sb) | ||
406 | { | ||
407 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | ||
408 | sb->s_dirt = 0; | ||
409 | |||
410 | if (sb->s_flags & MS_RDONLY) | ||
411 | return; | ||
412 | |||
413 | D1(printk(KERN_DEBUG "jffs2_write_super()\n")); | ||
414 | jffs2_garbage_collect_trigger(c); | ||
415 | jffs2_erase_pending_blocks(c, 0); | ||
416 | jffs2_flush_wbuf_gc(c, 0); | ||
417 | } | ||
418 | |||
419 | |||
420 | /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, | 408 | /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, |
421 | fill in the raw_inode while you're at it. */ | 409 | fill in the raw_inode while you're at it. */ |
422 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri) | 410 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri) |
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 5e194a5c8e29..2228380c47b9 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
@@ -181,7 +181,6 @@ void jffs2_dirty_inode(struct inode *inode); | |||
181 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, | 181 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, |
182 | struct jffs2_raw_inode *ri); | 182 | struct jffs2_raw_inode *ri); |
183 | int jffs2_statfs (struct dentry *, struct kstatfs *); | 183 | int jffs2_statfs (struct dentry *, struct kstatfs *); |
184 | void jffs2_write_super (struct super_block *); | ||
185 | int jffs2_remount_fs (struct super_block *, int *, char *); | 184 | int jffs2_remount_fs (struct super_block *, int *, char *); |
186 | int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); | 185 | int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); |
187 | void jffs2_gc_release_inode(struct jffs2_sb_info *c, | 186 | void jffs2_gc_release_inode(struct jffs2_sb_info *c, |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 4c4e18c54a51..07a22caf2687 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -53,10 +53,29 @@ static void jffs2_i_init_once(void *foo) | |||
53 | inode_init_once(&f->vfs_inode); | 53 | inode_init_once(&f->vfs_inode); |
54 | } | 54 | } |
55 | 55 | ||
56 | static void jffs2_write_super(struct super_block *sb) | ||
57 | { | ||
58 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | ||
59 | |||
60 | lock_super(sb); | ||
61 | sb->s_dirt = 0; | ||
62 | |||
63 | if (!(sb->s_flags & MS_RDONLY)) { | ||
64 | D1(printk(KERN_DEBUG "jffs2_write_super()\n")); | ||
65 | jffs2_garbage_collect_trigger(c); | ||
66 | jffs2_erase_pending_blocks(c, 0); | ||
67 | jffs2_flush_wbuf_gc(c, 0); | ||
68 | } | ||
69 | |||
70 | unlock_super(sb); | ||
71 | } | ||
72 | |||
56 | static int jffs2_sync_fs(struct super_block *sb, int wait) | 73 | static int jffs2_sync_fs(struct super_block *sb, int wait) |
57 | { | 74 | { |
58 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | 75 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); |
59 | 76 | ||
77 | jffs2_write_super(sb); | ||
78 | |||
60 | mutex_lock(&c->alloc_sem); | 79 | mutex_lock(&c->alloc_sem); |
61 | jffs2_flush_wbuf_pad(c); | 80 | jffs2_flush_wbuf_pad(c); |
62 | mutex_unlock(&c->alloc_sem); | 81 | mutex_unlock(&c->alloc_sem); |
@@ -174,6 +193,11 @@ static void jffs2_put_super (struct super_block *sb) | |||
174 | 193 | ||
175 | D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); | 194 | D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); |
176 | 195 | ||
196 | lock_kernel(); | ||
197 | |||
198 | if (sb->s_dirt) | ||
199 | jffs2_write_super(sb); | ||
200 | |||
177 | mutex_lock(&c->alloc_sem); | 201 | mutex_lock(&c->alloc_sem); |
178 | jffs2_flush_wbuf_pad(c); | 202 | jffs2_flush_wbuf_pad(c); |
179 | mutex_unlock(&c->alloc_sem); | 203 | mutex_unlock(&c->alloc_sem); |
@@ -192,6 +216,8 @@ static void jffs2_put_super (struct super_block *sb) | |||
192 | if (c->mtd->sync) | 216 | if (c->mtd->sync) |
193 | c->mtd->sync(c->mtd); | 217 | c->mtd->sync(c->mtd); |
194 | 218 | ||
219 | unlock_kernel(); | ||
220 | |||
195 | D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); | 221 | D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); |
196 | } | 222 | } |
197 | 223 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 346057218edc..0fc30407f039 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -2571,6 +2571,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2571 | 2571 | ||
2572 | txAbort(tid, 0); | 2572 | txAbort(tid, 0); |
2573 | txEnd(tid); | 2573 | txEnd(tid); |
2574 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); | ||
2574 | 2575 | ||
2575 | /* release the inode map lock */ | 2576 | /* release the inode map lock */ |
2576 | IWRITE_UNLOCK(ipimap); | 2577 | IWRITE_UNLOCK(ipimap); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 6f21adf9479a..09b1b6ee2186 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/crc32.h> | 32 | #include <linux/crc32.h> |
33 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
34 | #include <linux/seq_file.h> | 34 | #include <linux/seq_file.h> |
35 | #include <linux/smp_lock.h> | ||
35 | 36 | ||
36 | #include "jfs_incore.h" | 37 | #include "jfs_incore.h" |
37 | #include "jfs_filsys.h" | 38 | #include "jfs_filsys.h" |
@@ -183,6 +184,9 @@ static void jfs_put_super(struct super_block *sb) | |||
183 | int rc; | 184 | int rc; |
184 | 185 | ||
185 | jfs_info("In jfs_put_super"); | 186 | jfs_info("In jfs_put_super"); |
187 | |||
188 | lock_kernel(); | ||
189 | |||
186 | rc = jfs_umount(sb); | 190 | rc = jfs_umount(sb); |
187 | if (rc) | 191 | if (rc) |
188 | jfs_err("jfs_umount failed with return code %d", rc); | 192 | jfs_err("jfs_umount failed with return code %d", rc); |
@@ -195,6 +199,8 @@ static void jfs_put_super(struct super_block *sb) | |||
195 | sbi->direct_inode = NULL; | 199 | sbi->direct_inode = NULL; |
196 | 200 | ||
197 | kfree(sbi); | 201 | kfree(sbi); |
202 | |||
203 | unlock_kernel(); | ||
198 | } | 204 | } |
199 | 205 | ||
200 | enum { | 206 | enum { |
@@ -370,19 +376,24 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
370 | s64 newLVSize = 0; | 376 | s64 newLVSize = 0; |
371 | int rc = 0; | 377 | int rc = 0; |
372 | int flag = JFS_SBI(sb)->flag; | 378 | int flag = JFS_SBI(sb)->flag; |
379 | int ret; | ||
373 | 380 | ||
374 | if (!parse_options(data, sb, &newLVSize, &flag)) { | 381 | if (!parse_options(data, sb, &newLVSize, &flag)) { |
375 | return -EINVAL; | 382 | return -EINVAL; |
376 | } | 383 | } |
384 | lock_kernel(); | ||
377 | if (newLVSize) { | 385 | if (newLVSize) { |
378 | if (sb->s_flags & MS_RDONLY) { | 386 | if (sb->s_flags & MS_RDONLY) { |
379 | printk(KERN_ERR | 387 | printk(KERN_ERR |
380 | "JFS: resize requires volume to be mounted read-write\n"); | 388 | "JFS: resize requires volume to be mounted read-write\n"); |
389 | unlock_kernel(); | ||
381 | return -EROFS; | 390 | return -EROFS; |
382 | } | 391 | } |
383 | rc = jfs_extendfs(sb, newLVSize, 0); | 392 | rc = jfs_extendfs(sb, newLVSize, 0); |
384 | if (rc) | 393 | if (rc) { |
394 | unlock_kernel(); | ||
385 | return rc; | 395 | return rc; |
396 | } | ||
386 | } | 397 | } |
387 | 398 | ||
388 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | 399 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { |
@@ -393,23 +404,31 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
393 | truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); | 404 | truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); |
394 | 405 | ||
395 | JFS_SBI(sb)->flag = flag; | 406 | JFS_SBI(sb)->flag = flag; |
396 | return jfs_mount_rw(sb, 1); | 407 | ret = jfs_mount_rw(sb, 1); |
408 | unlock_kernel(); | ||
409 | return ret; | ||
397 | } | 410 | } |
398 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { | 411 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { |
399 | rc = jfs_umount_rw(sb); | 412 | rc = jfs_umount_rw(sb); |
400 | JFS_SBI(sb)->flag = flag; | 413 | JFS_SBI(sb)->flag = flag; |
414 | unlock_kernel(); | ||
401 | return rc; | 415 | return rc; |
402 | } | 416 | } |
403 | if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) | 417 | if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) |
404 | if (!(sb->s_flags & MS_RDONLY)) { | 418 | if (!(sb->s_flags & MS_RDONLY)) { |
405 | rc = jfs_umount_rw(sb); | 419 | rc = jfs_umount_rw(sb); |
406 | if (rc) | 420 | if (rc) { |
421 | unlock_kernel(); | ||
407 | return rc; | 422 | return rc; |
423 | } | ||
408 | JFS_SBI(sb)->flag = flag; | 424 | JFS_SBI(sb)->flag = flag; |
409 | return jfs_mount_rw(sb, 1); | 425 | ret = jfs_mount_rw(sb, 1); |
426 | unlock_kernel(); | ||
427 | return ret; | ||
410 | } | 428 | } |
411 | JFS_SBI(sb)->flag = flag; | 429 | JFS_SBI(sb)->flag = flag; |
412 | 430 | ||
431 | unlock_kernel(); | ||
413 | return 0; | 432 | return 0; |
414 | } | 433 | } |
415 | 434 | ||
@@ -720,8 +739,10 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type, | |||
720 | blk++; | 739 | blk++; |
721 | } | 740 | } |
722 | out: | 741 | out: |
723 | if (len == towrite) | 742 | if (len == towrite) { |
743 | mutex_unlock(&inode->i_mutex); | ||
724 | return err; | 744 | return err; |
745 | } | ||
725 | if (inode->i_size < off+len-towrite) | 746 | if (inode->i_size < off+len-towrite) |
726 | i_size_write(inode, off+len-towrite); | 747 | i_size_write(inode, off+len-towrite); |
727 | inode->i_version++; | 748 | inode->i_version++; |
diff --git a/fs/libfs.c b/fs/libfs.c index 80046ddf5063..ddfa89948c3f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -9,6 +9,8 @@ | |||
9 | #include <linux/vfs.h> | 9 | #include <linux/vfs.h> |
10 | #include <linux/mutex.h> | 10 | #include <linux/mutex.h> |
11 | #include <linux/exportfs.h> | 11 | #include <linux/exportfs.h> |
12 | #include <linux/writeback.h> | ||
13 | #include <linux/buffer_head.h> | ||
12 | 14 | ||
13 | #include <asm/uaccess.h> | 15 | #include <asm/uaccess.h> |
14 | 16 | ||
@@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
807 | } | 809 | } |
808 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); | 810 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); |
809 | 811 | ||
812 | int simple_fsync(struct file *file, struct dentry *dentry, int datasync) | ||
813 | { | ||
814 | struct writeback_control wbc = { | ||
815 | .sync_mode = WB_SYNC_ALL, | ||
816 | .nr_to_write = 0, /* metadata-only; caller takes care of data */ | ||
817 | }; | ||
818 | struct inode *inode = dentry->d_inode; | ||
819 | int err; | ||
820 | int ret; | ||
821 | |||
822 | ret = sync_mapping_buffers(inode->i_mapping); | ||
823 | if (!(inode->i_state & I_DIRTY)) | ||
824 | return ret; | ||
825 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
826 | return ret; | ||
827 | |||
828 | err = sync_inode(inode, &wbc); | ||
829 | if (ret == 0) | ||
830 | ret = err; | ||
831 | return ret; | ||
832 | } | ||
833 | EXPORT_SYMBOL(simple_fsync); | ||
834 | |||
810 | EXPORT_SYMBOL(dcache_dir_close); | 835 | EXPORT_SYMBOL(dcache_dir_close); |
811 | EXPORT_SYMBOL(dcache_dir_lseek); | 836 | EXPORT_SYMBOL(dcache_dir_lseek); |
812 | EXPORT_SYMBOL(dcache_dir_open); | 837 | EXPORT_SYMBOL(dcache_dir_open); |
diff --git a/fs/minix/dir.c b/fs/minix/dir.c index d4946c4c90e2..e5f206467e40 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c | |||
@@ -22,7 +22,7 @@ static int minix_readdir(struct file *, void *, filldir_t); | |||
22 | const struct file_operations minix_dir_operations = { | 22 | const struct file_operations minix_dir_operations = { |
23 | .read = generic_read_dir, | 23 | .read = generic_read_dir, |
24 | .readdir = minix_readdir, | 24 | .readdir = minix_readdir, |
25 | .fsync = minix_sync_file, | 25 | .fsync = simple_fsync, |
26 | }; | 26 | }; |
27 | 27 | ||
28 | static inline void dir_put_page(struct page *page) | 28 | static inline void dir_put_page(struct page *page) |
diff --git a/fs/minix/file.c b/fs/minix/file.c index 17765f697e50..3eec3e607a87 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c | |||
@@ -6,15 +6,12 @@ | |||
6 | * minix regular file handling primitives | 6 | * minix regular file handling primitives |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/buffer_head.h> /* for fsync_inode_buffers() */ | ||
10 | #include "minix.h" | 9 | #include "minix.h" |
11 | 10 | ||
12 | /* | 11 | /* |
13 | * We have mostly NULLs here: the current defaults are OK for | 12 | * We have mostly NULLs here: the current defaults are OK for |
14 | * the minix filesystem. | 13 | * the minix filesystem. |
15 | */ | 14 | */ |
16 | int minix_sync_file(struct file *, struct dentry *, int); | ||
17 | |||
18 | const struct file_operations minix_file_operations = { | 15 | const struct file_operations minix_file_operations = { |
19 | .llseek = generic_file_llseek, | 16 | .llseek = generic_file_llseek, |
20 | .read = do_sync_read, | 17 | .read = do_sync_read, |
@@ -22,7 +19,7 @@ const struct file_operations minix_file_operations = { | |||
22 | .write = do_sync_write, | 19 | .write = do_sync_write, |
23 | .aio_write = generic_file_aio_write, | 20 | .aio_write = generic_file_aio_write, |
24 | .mmap = generic_file_mmap, | 21 | .mmap = generic_file_mmap, |
25 | .fsync = minix_sync_file, | 22 | .fsync = simple_fsync, |
26 | .splice_read = generic_file_splice_read, | 23 | .splice_read = generic_file_splice_read, |
27 | }; | 24 | }; |
28 | 25 | ||
@@ -30,18 +27,3 @@ const struct inode_operations minix_file_inode_operations = { | |||
30 | .truncate = minix_truncate, | 27 | .truncate = minix_truncate, |
31 | .getattr = minix_getattr, | 28 | .getattr = minix_getattr, |
32 | }; | 29 | }; |
33 | |||
34 | int minix_sync_file(struct file * file, struct dentry *dentry, int datasync) | ||
35 | { | ||
36 | struct inode *inode = dentry->d_inode; | ||
37 | int err; | ||
38 | |||
39 | err = sync_mapping_buffers(inode->i_mapping); | ||
40 | if (!(inode->i_state & I_DIRTY)) | ||
41 | return err; | ||
42 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
43 | return err; | ||
44 | |||
45 | err |= minix_sync_inode(inode); | ||
46 | return err ? -EIO : 0; | ||
47 | } | ||
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index daad3c2740db..f91a23693597 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -35,6 +35,8 @@ static void minix_put_super(struct super_block *sb) | |||
35 | int i; | 35 | int i; |
36 | struct minix_sb_info *sbi = minix_sb(sb); | 36 | struct minix_sb_info *sbi = minix_sb(sb); |
37 | 37 | ||
38 | lock_kernel(); | ||
39 | |||
38 | if (!(sb->s_flags & MS_RDONLY)) { | 40 | if (!(sb->s_flags & MS_RDONLY)) { |
39 | if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ | 41 | if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ |
40 | sbi->s_ms->s_state = sbi->s_mount_state; | 42 | sbi->s_ms->s_state = sbi->s_mount_state; |
@@ -49,7 +51,7 @@ static void minix_put_super(struct super_block *sb) | |||
49 | sb->s_fs_info = NULL; | 51 | sb->s_fs_info = NULL; |
50 | kfree(sbi); | 52 | kfree(sbi); |
51 | 53 | ||
52 | return; | 54 | unlock_kernel(); |
53 | } | 55 | } |
54 | 56 | ||
55 | static struct kmem_cache * minix_inode_cachep; | 57 | static struct kmem_cache * minix_inode_cachep; |
@@ -554,38 +556,25 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) | |||
554 | return bh; | 556 | return bh; |
555 | } | 557 | } |
556 | 558 | ||
557 | static struct buffer_head *minix_update_inode(struct inode *inode) | 559 | static int minix_write_inode(struct inode *inode, int wait) |
558 | { | ||
559 | if (INODE_VERSION(inode) == MINIX_V1) | ||
560 | return V1_minix_update_inode(inode); | ||
561 | else | ||
562 | return V2_minix_update_inode(inode); | ||
563 | } | ||
564 | |||
565 | static int minix_write_inode(struct inode * inode, int wait) | ||
566 | { | ||
567 | brelse(minix_update_inode(inode)); | ||
568 | return 0; | ||
569 | } | ||
570 | |||
571 | int minix_sync_inode(struct inode * inode) | ||
572 | { | 560 | { |
573 | int err = 0; | 561 | int err = 0; |
574 | struct buffer_head *bh; | 562 | struct buffer_head *bh; |
575 | 563 | ||
576 | bh = minix_update_inode(inode); | 564 | if (INODE_VERSION(inode) == MINIX_V1) |
577 | if (bh && buffer_dirty(bh)) | 565 | bh = V1_minix_update_inode(inode); |
578 | { | 566 | else |
567 | bh = V2_minix_update_inode(inode); | ||
568 | if (!bh) | ||
569 | return -EIO; | ||
570 | if (wait && buffer_dirty(bh)) { | ||
579 | sync_dirty_buffer(bh); | 571 | sync_dirty_buffer(bh); |
580 | if (buffer_req(bh) && !buffer_uptodate(bh)) | 572 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
581 | { | ||
582 | printk("IO error syncing minix inode [%s:%08lx]\n", | 573 | printk("IO error syncing minix inode [%s:%08lx]\n", |
583 | inode->i_sb->s_id, inode->i_ino); | 574 | inode->i_sb->s_id, inode->i_ino); |
584 | err = -1; | 575 | err = -EIO; |
585 | } | 576 | } |
586 | } | 577 | } |
587 | else if (!bh) | ||
588 | err = -1; | ||
589 | brelse (bh); | 578 | brelse (bh); |
590 | return err; | 579 | return err; |
591 | } | 580 | } |
diff --git a/fs/minix/minix.h b/fs/minix/minix.h index e6a0b193bea4..cb7fdd11f9a5 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h | |||
@@ -57,7 +57,6 @@ extern int __minix_write_begin(struct file *file, struct address_space *mapping, | |||
57 | extern void V1_minix_truncate(struct inode *); | 57 | extern void V1_minix_truncate(struct inode *); |
58 | extern void V2_minix_truncate(struct inode *); | 58 | extern void V2_minix_truncate(struct inode *); |
59 | extern void minix_truncate(struct inode *); | 59 | extern void minix_truncate(struct inode *); |
60 | extern int minix_sync_inode(struct inode *); | ||
61 | extern void minix_set_inode(struct inode *, dev_t); | 60 | extern void minix_set_inode(struct inode *, dev_t); |
62 | extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int); | 61 | extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int); |
63 | extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int); | 62 | extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int); |
@@ -72,7 +71,6 @@ extern int minix_empty_dir(struct inode*); | |||
72 | extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*); | 71 | extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*); |
73 | extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**); | 72 | extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**); |
74 | extern ino_t minix_inode_by_name(struct dentry*); | 73 | extern ino_t minix_inode_by_name(struct dentry*); |
75 | extern int minix_sync_file(struct file *, struct dentry *, int); | ||
76 | 74 | ||
77 | extern const struct inode_operations minix_file_inode_operations; | 75 | extern const struct inode_operations minix_file_inode_operations; |
78 | extern const struct inode_operations minix_dir_inode_operations; | 76 | extern const struct inode_operations minix_dir_inode_operations; |
diff --git a/fs/mpage.c b/fs/mpage.c index 680ba60863ff..42381bd6543b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
379 | struct buffer_head map_bh; | 379 | struct buffer_head map_bh; |
380 | unsigned long first_logical_block = 0; | 380 | unsigned long first_logical_block = 0; |
381 | 381 | ||
382 | clear_buffer_mapped(&map_bh); | 382 | map_bh.b_state = 0; |
383 | map_bh.b_size = 0; | ||
383 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 384 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
384 | struct page *page = list_entry(pages->prev, struct page, lru); | 385 | struct page *page = list_entry(pages->prev, struct page, lru); |
385 | 386 | ||
@@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block) | |||
412 | struct buffer_head map_bh; | 413 | struct buffer_head map_bh; |
413 | unsigned long first_logical_block = 0; | 414 | unsigned long first_logical_block = 0; |
414 | 415 | ||
415 | clear_buffer_mapped(&map_bh); | 416 | map_bh.b_state = 0; |
417 | map_bh.b_size = 0; | ||
416 | bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, | 418 | bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, |
417 | &map_bh, &first_logical_block, get_block); | 419 | &map_bh, &first_logical_block, get_block); |
418 | if (bio) | 420 | if (bio) |
diff --git a/fs/namei.c b/fs/namei.c index 967c3db92724..527119afb6a5 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd | |||
552 | return result; | 552 | return result; |
553 | } | 553 | } |
554 | 554 | ||
555 | static __always_inline void set_root(struct nameidata *nd) | ||
556 | { | ||
557 | if (!nd->root.mnt) { | ||
558 | struct fs_struct *fs = current->fs; | ||
559 | read_lock(&fs->lock); | ||
560 | nd->root = fs->root; | ||
561 | path_get(&nd->root); | ||
562 | read_unlock(&fs->lock); | ||
563 | } | ||
564 | } | ||
565 | |||
555 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) | 566 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) |
556 | { | 567 | { |
557 | int res = 0; | 568 | int res = 0; |
@@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l | |||
560 | goto fail; | 571 | goto fail; |
561 | 572 | ||
562 | if (*link == '/') { | 573 | if (*link == '/') { |
563 | struct fs_struct *fs = current->fs; | 574 | set_root(nd); |
564 | |||
565 | path_put(&nd->path); | 575 | path_put(&nd->path); |
566 | 576 | nd->path = nd->root; | |
567 | read_lock(&fs->lock); | 577 | path_get(&nd->root); |
568 | nd->path = fs->root; | ||
569 | path_get(&fs->root); | ||
570 | read_unlock(&fs->lock); | ||
571 | } | 578 | } |
572 | 579 | ||
573 | res = link_path_walk(link, nd); | 580 | res = link_path_walk(link, nd); |
@@ -668,23 +675,23 @@ loop: | |||
668 | return err; | 675 | return err; |
669 | } | 676 | } |
670 | 677 | ||
671 | int follow_up(struct vfsmount **mnt, struct dentry **dentry) | 678 | int follow_up(struct path *path) |
672 | { | 679 | { |
673 | struct vfsmount *parent; | 680 | struct vfsmount *parent; |
674 | struct dentry *mountpoint; | 681 | struct dentry *mountpoint; |
675 | spin_lock(&vfsmount_lock); | 682 | spin_lock(&vfsmount_lock); |
676 | parent=(*mnt)->mnt_parent; | 683 | parent = path->mnt->mnt_parent; |
677 | if (parent == *mnt) { | 684 | if (parent == path->mnt) { |
678 | spin_unlock(&vfsmount_lock); | 685 | spin_unlock(&vfsmount_lock); |
679 | return 0; | 686 | return 0; |
680 | } | 687 | } |
681 | mntget(parent); | 688 | mntget(parent); |
682 | mountpoint=dget((*mnt)->mnt_mountpoint); | 689 | mountpoint = dget(path->mnt->mnt_mountpoint); |
683 | spin_unlock(&vfsmount_lock); | 690 | spin_unlock(&vfsmount_lock); |
684 | dput(*dentry); | 691 | dput(path->dentry); |
685 | *dentry = mountpoint; | 692 | path->dentry = mountpoint; |
686 | mntput(*mnt); | 693 | mntput(path->mnt); |
687 | *mnt = parent; | 694 | path->mnt = parent; |
688 | return 1; | 695 | return 1; |
689 | } | 696 | } |
690 | 697 | ||
@@ -695,7 +702,7 @@ static int __follow_mount(struct path *path) | |||
695 | { | 702 | { |
696 | int res = 0; | 703 | int res = 0; |
697 | while (d_mountpoint(path->dentry)) { | 704 | while (d_mountpoint(path->dentry)) { |
698 | struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); | 705 | struct vfsmount *mounted = lookup_mnt(path); |
699 | if (!mounted) | 706 | if (!mounted) |
700 | break; | 707 | break; |
701 | dput(path->dentry); | 708 | dput(path->dentry); |
@@ -708,32 +715,32 @@ static int __follow_mount(struct path *path) | |||
708 | return res; | 715 | return res; |
709 | } | 716 | } |
710 | 717 | ||
711 | static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) | 718 | static void follow_mount(struct path *path) |
712 | { | 719 | { |
713 | while (d_mountpoint(*dentry)) { | 720 | while (d_mountpoint(path->dentry)) { |
714 | struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); | 721 | struct vfsmount *mounted = lookup_mnt(path); |
715 | if (!mounted) | 722 | if (!mounted) |
716 | break; | 723 | break; |
717 | dput(*dentry); | 724 | dput(path->dentry); |
718 | mntput(*mnt); | 725 | mntput(path->mnt); |
719 | *mnt = mounted; | 726 | path->mnt = mounted; |
720 | *dentry = dget(mounted->mnt_root); | 727 | path->dentry = dget(mounted->mnt_root); |
721 | } | 728 | } |
722 | } | 729 | } |
723 | 730 | ||
724 | /* no need for dcache_lock, as serialization is taken care in | 731 | /* no need for dcache_lock, as serialization is taken care in |
725 | * namespace.c | 732 | * namespace.c |
726 | */ | 733 | */ |
727 | int follow_down(struct vfsmount **mnt, struct dentry **dentry) | 734 | int follow_down(struct path *path) |
728 | { | 735 | { |
729 | struct vfsmount *mounted; | 736 | struct vfsmount *mounted; |
730 | 737 | ||
731 | mounted = lookup_mnt(*mnt, *dentry); | 738 | mounted = lookup_mnt(path); |
732 | if (mounted) { | 739 | if (mounted) { |
733 | dput(*dentry); | 740 | dput(path->dentry); |
734 | mntput(*mnt); | 741 | mntput(path->mnt); |
735 | *mnt = mounted; | 742 | path->mnt = mounted; |
736 | *dentry = dget(mounted->mnt_root); | 743 | path->dentry = dget(mounted->mnt_root); |
737 | return 1; | 744 | return 1; |
738 | } | 745 | } |
739 | return 0; | 746 | return 0; |
@@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) | |||
741 | 748 | ||
742 | static __always_inline void follow_dotdot(struct nameidata *nd) | 749 | static __always_inline void follow_dotdot(struct nameidata *nd) |
743 | { | 750 | { |
744 | struct fs_struct *fs = current->fs; | 751 | set_root(nd); |
745 | 752 | ||
746 | while(1) { | 753 | while(1) { |
747 | struct vfsmount *parent; | 754 | struct vfsmount *parent; |
748 | struct dentry *old = nd->path.dentry; | 755 | struct dentry *old = nd->path.dentry; |
749 | 756 | ||
750 | read_lock(&fs->lock); | 757 | if (nd->path.dentry == nd->root.dentry && |
751 | if (nd->path.dentry == fs->root.dentry && | 758 | nd->path.mnt == nd->root.mnt) { |
752 | nd->path.mnt == fs->root.mnt) { | ||
753 | read_unlock(&fs->lock); | ||
754 | break; | 759 | break; |
755 | } | 760 | } |
756 | read_unlock(&fs->lock); | ||
757 | spin_lock(&dcache_lock); | 761 | spin_lock(&dcache_lock); |
758 | if (nd->path.dentry != nd->path.mnt->mnt_root) { | 762 | if (nd->path.dentry != nd->path.mnt->mnt_root) { |
759 | nd->path.dentry = dget(nd->path.dentry->d_parent); | 763 | nd->path.dentry = dget(nd->path.dentry->d_parent); |
@@ -775,7 +779,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd) | |||
775 | mntput(nd->path.mnt); | 779 | mntput(nd->path.mnt); |
776 | nd->path.mnt = parent; | 780 | nd->path.mnt = parent; |
777 | } | 781 | } |
778 | follow_mount(&nd->path.mnt, &nd->path.dentry); | 782 | follow_mount(&nd->path); |
779 | } | 783 | } |
780 | 784 | ||
781 | /* | 785 | /* |
@@ -853,7 +857,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd) | |||
853 | err = inode_permission(nd->path.dentry->d_inode, | 857 | err = inode_permission(nd->path.dentry->d_inode, |
854 | MAY_EXEC); | 858 | MAY_EXEC); |
855 | if (!err) | 859 | if (!err) |
856 | err = ima_path_check(&nd->path, MAY_EXEC); | 860 | err = ima_path_check(&nd->path, MAY_EXEC, |
861 | IMA_COUNT_UPDATE); | ||
857 | if (err) | 862 | if (err) |
858 | break; | 863 | break; |
859 | 864 | ||
@@ -1016,25 +1021,23 @@ static int path_walk(const char *name, struct nameidata *nd) | |||
1016 | return link_path_walk(name, nd); | 1021 | return link_path_walk(name, nd); |
1017 | } | 1022 | } |
1018 | 1023 | ||
1019 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ | 1024 | static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) |
1020 | static int do_path_lookup(int dfd, const char *name, | ||
1021 | unsigned int flags, struct nameidata *nd) | ||
1022 | { | 1025 | { |
1023 | int retval = 0; | 1026 | int retval = 0; |
1024 | int fput_needed; | 1027 | int fput_needed; |
1025 | struct file *file; | 1028 | struct file *file; |
1026 | struct fs_struct *fs = current->fs; | ||
1027 | 1029 | ||
1028 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | 1030 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ |
1029 | nd->flags = flags; | 1031 | nd->flags = flags; |
1030 | nd->depth = 0; | 1032 | nd->depth = 0; |
1033 | nd->root.mnt = NULL; | ||
1031 | 1034 | ||
1032 | if (*name=='/') { | 1035 | if (*name=='/') { |
1033 | read_lock(&fs->lock); | 1036 | set_root(nd); |
1034 | nd->path = fs->root; | 1037 | nd->path = nd->root; |
1035 | path_get(&fs->root); | 1038 | path_get(&nd->root); |
1036 | read_unlock(&fs->lock); | ||
1037 | } else if (dfd == AT_FDCWD) { | 1039 | } else if (dfd == AT_FDCWD) { |
1040 | struct fs_struct *fs = current->fs; | ||
1038 | read_lock(&fs->lock); | 1041 | read_lock(&fs->lock); |
1039 | nd->path = fs->pwd; | 1042 | nd->path = fs->pwd; |
1040 | path_get(&fs->pwd); | 1043 | path_get(&fs->pwd); |
@@ -1062,17 +1065,29 @@ static int do_path_lookup(int dfd, const char *name, | |||
1062 | 1065 | ||
1063 | fput_light(file, fput_needed); | 1066 | fput_light(file, fput_needed); |
1064 | } | 1067 | } |
1068 | return 0; | ||
1065 | 1069 | ||
1066 | retval = path_walk(name, nd); | 1070 | fput_fail: |
1071 | fput_light(file, fput_needed); | ||
1072 | out_fail: | ||
1073 | return retval; | ||
1074 | } | ||
1075 | |||
1076 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ | ||
1077 | static int do_path_lookup(int dfd, const char *name, | ||
1078 | unsigned int flags, struct nameidata *nd) | ||
1079 | { | ||
1080 | int retval = path_init(dfd, name, flags, nd); | ||
1081 | if (!retval) | ||
1082 | retval = path_walk(name, nd); | ||
1067 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | 1083 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && |
1068 | nd->path.dentry->d_inode)) | 1084 | nd->path.dentry->d_inode)) |
1069 | audit_inode(name, nd->path.dentry); | 1085 | audit_inode(name, nd->path.dentry); |
1070 | out_fail: | 1086 | if (nd->root.mnt) { |
1087 | path_put(&nd->root); | ||
1088 | nd->root.mnt = NULL; | ||
1089 | } | ||
1071 | return retval; | 1090 | return retval; |
1072 | |||
1073 | fput_fail: | ||
1074 | fput_light(file, fput_needed); | ||
1075 | goto out_fail; | ||
1076 | } | 1091 | } |
1077 | 1092 | ||
1078 | int path_lookup(const char *name, unsigned int flags, | 1093 | int path_lookup(const char *name, unsigned int flags, |
@@ -1112,14 +1127,18 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
1112 | nd->path.dentry = dentry; | 1127 | nd->path.dentry = dentry; |
1113 | nd->path.mnt = mnt; | 1128 | nd->path.mnt = mnt; |
1114 | path_get(&nd->path); | 1129 | path_get(&nd->path); |
1130 | nd->root = nd->path; | ||
1131 | path_get(&nd->root); | ||
1115 | 1132 | ||
1116 | retval = path_walk(name, nd); | 1133 | retval = path_walk(name, nd); |
1117 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | 1134 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && |
1118 | nd->path.dentry->d_inode)) | 1135 | nd->path.dentry->d_inode)) |
1119 | audit_inode(name, nd->path.dentry); | 1136 | audit_inode(name, nd->path.dentry); |
1120 | 1137 | ||
1121 | return retval; | 1138 | path_put(&nd->root); |
1139 | nd->root.mnt = NULL; | ||
1122 | 1140 | ||
1141 | return retval; | ||
1123 | } | 1142 | } |
1124 | 1143 | ||
1125 | /** | 1144 | /** |
@@ -1515,7 +1534,8 @@ int may_open(struct path *path, int acc_mode, int flag) | |||
1515 | return error; | 1534 | return error; |
1516 | 1535 | ||
1517 | error = ima_path_check(path, | 1536 | error = ima_path_check(path, |
1518 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); | 1537 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), |
1538 | IMA_COUNT_UPDATE); | ||
1519 | if (error) | 1539 | if (error) |
1520 | return error; | 1540 | return error; |
1521 | /* | 1541 | /* |
@@ -1674,9 +1694,14 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1674 | /* | 1694 | /* |
1675 | * Create - we need to know the parent. | 1695 | * Create - we need to know the parent. |
1676 | */ | 1696 | */ |
1677 | error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); | 1697 | error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); |
1678 | if (error) | 1698 | if (error) |
1679 | return ERR_PTR(error); | 1699 | return ERR_PTR(error); |
1700 | error = path_walk(pathname, &nd); | ||
1701 | if (error) | ||
1702 | return ERR_PTR(error); | ||
1703 | if (unlikely(!audit_dummy_context())) | ||
1704 | audit_inode(pathname, nd.path.dentry); | ||
1680 | 1705 | ||
1681 | /* | 1706 | /* |
1682 | * We have the parent and last component. First of all, check | 1707 | * We have the parent and last component. First of all, check |
@@ -1804,6 +1829,8 @@ exit: | |||
1804 | if (!IS_ERR(nd.intent.open.file)) | 1829 | if (!IS_ERR(nd.intent.open.file)) |
1805 | release_open_intent(&nd); | 1830 | release_open_intent(&nd); |
1806 | exit_parent: | 1831 | exit_parent: |
1832 | if (nd.root.mnt) | ||
1833 | path_put(&nd.root); | ||
1807 | path_put(&nd.path); | 1834 | path_put(&nd.path); |
1808 | return ERR_PTR(error); | 1835 | return ERR_PTR(error); |
1809 | 1836 | ||
diff --git a/fs/namespace.c b/fs/namespace.c index 134d494158d9..2dd333b0fe7f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
131 | INIT_LIST_HEAD(&mnt->mnt_share); | 131 | INIT_LIST_HEAD(&mnt->mnt_share); |
132 | INIT_LIST_HEAD(&mnt->mnt_slave_list); | 132 | INIT_LIST_HEAD(&mnt->mnt_slave_list); |
133 | INIT_LIST_HEAD(&mnt->mnt_slave); | 133 | INIT_LIST_HEAD(&mnt->mnt_slave); |
134 | atomic_set(&mnt->__mnt_writers, 0); | 134 | #ifdef CONFIG_SMP |
135 | mnt->mnt_writers = alloc_percpu(int); | ||
136 | if (!mnt->mnt_writers) | ||
137 | goto out_free_devname; | ||
138 | #else | ||
139 | mnt->mnt_writers = 0; | ||
140 | #endif | ||
135 | } | 141 | } |
136 | return mnt; | 142 | return mnt; |
137 | 143 | ||
144 | #ifdef CONFIG_SMP | ||
145 | out_free_devname: | ||
146 | kfree(mnt->mnt_devname); | ||
147 | #endif | ||
138 | out_free_id: | 148 | out_free_id: |
139 | mnt_free_id(mnt); | 149 | mnt_free_id(mnt); |
140 | out_free_cache: | 150 | out_free_cache: |
@@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt) | |||
171 | } | 181 | } |
172 | EXPORT_SYMBOL_GPL(__mnt_is_readonly); | 182 | EXPORT_SYMBOL_GPL(__mnt_is_readonly); |
173 | 183 | ||
174 | struct mnt_writer { | 184 | static inline void inc_mnt_writers(struct vfsmount *mnt) |
175 | /* | 185 | { |
176 | * If holding multiple instances of this lock, they | 186 | #ifdef CONFIG_SMP |
177 | * must be ordered by cpu number. | 187 | (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; |
178 | */ | 188 | #else |
179 | spinlock_t lock; | 189 | mnt->mnt_writers++; |
180 | struct lock_class_key lock_class; /* compiles out with !lockdep */ | 190 | #endif |
181 | unsigned long count; | 191 | } |
182 | struct vfsmount *mnt; | ||
183 | } ____cacheline_aligned_in_smp; | ||
184 | static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); | ||
185 | 192 | ||
186 | static int __init init_mnt_writers(void) | 193 | static inline void dec_mnt_writers(struct vfsmount *mnt) |
187 | { | 194 | { |
188 | int cpu; | 195 | #ifdef CONFIG_SMP |
189 | for_each_possible_cpu(cpu) { | 196 | (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; |
190 | struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); | 197 | #else |
191 | spin_lock_init(&writer->lock); | 198 | mnt->mnt_writers--; |
192 | lockdep_set_class(&writer->lock, &writer->lock_class); | 199 | #endif |
193 | writer->count = 0; | ||
194 | } | ||
195 | return 0; | ||
196 | } | 200 | } |
197 | fs_initcall(init_mnt_writers); | ||
198 | 201 | ||
199 | static void unlock_mnt_writers(void) | 202 | static unsigned int count_mnt_writers(struct vfsmount *mnt) |
200 | { | 203 | { |
204 | #ifdef CONFIG_SMP | ||
205 | unsigned int count = 0; | ||
201 | int cpu; | 206 | int cpu; |
202 | struct mnt_writer *cpu_writer; | ||
203 | 207 | ||
204 | for_each_possible_cpu(cpu) { | 208 | for_each_possible_cpu(cpu) { |
205 | cpu_writer = &per_cpu(mnt_writers, cpu); | 209 | count += *per_cpu_ptr(mnt->mnt_writers, cpu); |
206 | spin_unlock(&cpu_writer->lock); | ||
207 | } | 210 | } |
208 | } | ||
209 | 211 | ||
210 | static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) | 212 | return count; |
211 | { | 213 | #else |
212 | if (!cpu_writer->mnt) | 214 | return mnt->mnt_writers; |
213 | return; | 215 | #endif |
214 | /* | ||
215 | * This is in case anyone ever leaves an invalid, | ||
216 | * old ->mnt and a count of 0. | ||
217 | */ | ||
218 | if (!cpu_writer->count) | ||
219 | return; | ||
220 | atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); | ||
221 | cpu_writer->count = 0; | ||
222 | } | ||
223 | /* | ||
224 | * must hold cpu_writer->lock | ||
225 | */ | ||
226 | static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, | ||
227 | struct vfsmount *mnt) | ||
228 | { | ||
229 | if (cpu_writer->mnt == mnt) | ||
230 | return; | ||
231 | __clear_mnt_count(cpu_writer); | ||
232 | cpu_writer->mnt = mnt; | ||
233 | } | 216 | } |
234 | 217 | ||
235 | /* | 218 | /* |
@@ -253,74 +236,73 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, | |||
253 | int mnt_want_write(struct vfsmount *mnt) | 236 | int mnt_want_write(struct vfsmount *mnt) |
254 | { | 237 | { |
255 | int ret = 0; | 238 | int ret = 0; |
256 | struct mnt_writer *cpu_writer; | ||
257 | 239 | ||
258 | cpu_writer = &get_cpu_var(mnt_writers); | 240 | preempt_disable(); |
259 | spin_lock(&cpu_writer->lock); | 241 | inc_mnt_writers(mnt); |
242 | /* | ||
243 | * The store to inc_mnt_writers must be visible before we pass | ||
244 | * MNT_WRITE_HOLD loop below, so that the slowpath can see our | ||
245 | * incremented count after it has set MNT_WRITE_HOLD. | ||
246 | */ | ||
247 | smp_mb(); | ||
248 | while (mnt->mnt_flags & MNT_WRITE_HOLD) | ||
249 | cpu_relax(); | ||
250 | /* | ||
251 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | ||
252 | * be set to match its requirements. So we must not load that until | ||
253 | * MNT_WRITE_HOLD is cleared. | ||
254 | */ | ||
255 | smp_rmb(); | ||
260 | if (__mnt_is_readonly(mnt)) { | 256 | if (__mnt_is_readonly(mnt)) { |
257 | dec_mnt_writers(mnt); | ||
261 | ret = -EROFS; | 258 | ret = -EROFS; |
262 | goto out; | 259 | goto out; |
263 | } | 260 | } |
264 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
265 | cpu_writer->count++; | ||
266 | out: | 261 | out: |
267 | spin_unlock(&cpu_writer->lock); | 262 | preempt_enable(); |
268 | put_cpu_var(mnt_writers); | ||
269 | return ret; | 263 | return ret; |
270 | } | 264 | } |
271 | EXPORT_SYMBOL_GPL(mnt_want_write); | 265 | EXPORT_SYMBOL_GPL(mnt_want_write); |
272 | 266 | ||
273 | static void lock_mnt_writers(void) | 267 | /** |
274 | { | 268 | * mnt_clone_write - get write access to a mount |
275 | int cpu; | 269 | * @mnt: the mount on which to take a write |
276 | struct mnt_writer *cpu_writer; | 270 | * |
277 | 271 | * This is effectively like mnt_want_write, except | |
278 | for_each_possible_cpu(cpu) { | 272 | * it must only be used to take an extra write reference |
279 | cpu_writer = &per_cpu(mnt_writers, cpu); | 273 | * on a mountpoint that we already know has a write reference |
280 | spin_lock(&cpu_writer->lock); | 274 | * on it. This allows some optimisation. |
281 | __clear_mnt_count(cpu_writer); | 275 | * |
282 | cpu_writer->mnt = NULL; | 276 | * After finished, mnt_drop_write must be called as usual to |
283 | } | 277 | * drop the reference. |
278 | */ | ||
279 | int mnt_clone_write(struct vfsmount *mnt) | ||
280 | { | ||
281 | /* superblock may be r/o */ | ||
282 | if (__mnt_is_readonly(mnt)) | ||
283 | return -EROFS; | ||
284 | preempt_disable(); | ||
285 | inc_mnt_writers(mnt); | ||
286 | preempt_enable(); | ||
287 | return 0; | ||
284 | } | 288 | } |
289 | EXPORT_SYMBOL_GPL(mnt_clone_write); | ||
285 | 290 | ||
286 | /* | 291 | /** |
287 | * These per-cpu write counts are not guaranteed to have | 292 | * mnt_want_write_file - get write access to a file's mount |
288 | * matched increments and decrements on any given cpu. | 293 | * @file: the file who's mount on which to take a write |
289 | * A file open()ed for write on one cpu and close()d on | 294 | * |
290 | * another cpu will imbalance this count. Make sure it | 295 | * This is like mnt_want_write, but it takes a file and can |
291 | * does not get too far out of whack. | 296 | * do some optimisations if the file is open for write already |
292 | */ | 297 | */ |
293 | static void handle_write_count_underflow(struct vfsmount *mnt) | 298 | int mnt_want_write_file(struct file *file) |
294 | { | 299 | { |
295 | if (atomic_read(&mnt->__mnt_writers) >= | 300 | if (!(file->f_mode & FMODE_WRITE)) |
296 | MNT_WRITER_UNDERFLOW_LIMIT) | 301 | return mnt_want_write(file->f_path.mnt); |
297 | return; | 302 | else |
298 | /* | 303 | return mnt_clone_write(file->f_path.mnt); |
299 | * It isn't necessary to hold all of the locks | ||
300 | * at the same time, but doing it this way makes | ||
301 | * us share a lot more code. | ||
302 | */ | ||
303 | lock_mnt_writers(); | ||
304 | /* | ||
305 | * vfsmount_lock is for mnt_flags. | ||
306 | */ | ||
307 | spin_lock(&vfsmount_lock); | ||
308 | /* | ||
309 | * If coalescing the per-cpu writer counts did not | ||
310 | * get us back to a positive writer count, we have | ||
311 | * a bug. | ||
312 | */ | ||
313 | if ((atomic_read(&mnt->__mnt_writers) < 0) && | ||
314 | !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { | ||
315 | WARN(1, KERN_DEBUG "leak detected on mount(%p) writers " | ||
316 | "count: %d\n", | ||
317 | mnt, atomic_read(&mnt->__mnt_writers)); | ||
318 | /* use the flag to keep the dmesg spam down */ | ||
319 | mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; | ||
320 | } | ||
321 | spin_unlock(&vfsmount_lock); | ||
322 | unlock_mnt_writers(); | ||
323 | } | 304 | } |
305 | EXPORT_SYMBOL_GPL(mnt_want_write_file); | ||
324 | 306 | ||
325 | /** | 307 | /** |
326 | * mnt_drop_write - give up write access to a mount | 308 | * mnt_drop_write - give up write access to a mount |
@@ -332,37 +314,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt) | |||
332 | */ | 314 | */ |
333 | void mnt_drop_write(struct vfsmount *mnt) | 315 | void mnt_drop_write(struct vfsmount *mnt) |
334 | { | 316 | { |
335 | int must_check_underflow = 0; | 317 | preempt_disable(); |
336 | struct mnt_writer *cpu_writer; | 318 | dec_mnt_writers(mnt); |
337 | 319 | preempt_enable(); | |
338 | cpu_writer = &get_cpu_var(mnt_writers); | ||
339 | spin_lock(&cpu_writer->lock); | ||
340 | |||
341 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
342 | if (cpu_writer->count > 0) { | ||
343 | cpu_writer->count--; | ||
344 | } else { | ||
345 | must_check_underflow = 1; | ||
346 | atomic_dec(&mnt->__mnt_writers); | ||
347 | } | ||
348 | |||
349 | spin_unlock(&cpu_writer->lock); | ||
350 | /* | ||
351 | * Logically, we could call this each time, | ||
352 | * but the __mnt_writers cacheline tends to | ||
353 | * be cold, and makes this expensive. | ||
354 | */ | ||
355 | if (must_check_underflow) | ||
356 | handle_write_count_underflow(mnt); | ||
357 | /* | ||
358 | * This could be done right after the spinlock | ||
359 | * is taken because the spinlock keeps us on | ||
360 | * the cpu, and disables preemption. However, | ||
361 | * putting it here bounds the amount that | ||
362 | * __mnt_writers can underflow. Without it, | ||
363 | * we could theoretically wrap __mnt_writers. | ||
364 | */ | ||
365 | put_cpu_var(mnt_writers); | ||
366 | } | 320 | } |
367 | EXPORT_SYMBOL_GPL(mnt_drop_write); | 321 | EXPORT_SYMBOL_GPL(mnt_drop_write); |
368 | 322 | ||
@@ -370,24 +324,41 @@ static int mnt_make_readonly(struct vfsmount *mnt) | |||
370 | { | 324 | { |
371 | int ret = 0; | 325 | int ret = 0; |
372 | 326 | ||
373 | lock_mnt_writers(); | 327 | spin_lock(&vfsmount_lock); |
328 | mnt->mnt_flags |= MNT_WRITE_HOLD; | ||
374 | /* | 329 | /* |
375 | * With all the locks held, this value is stable | 330 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store |
331 | * should be visible before we do. | ||
376 | */ | 332 | */ |
377 | if (atomic_read(&mnt->__mnt_writers) > 0) { | 333 | smp_mb(); |
378 | ret = -EBUSY; | 334 | |
379 | goto out; | ||
380 | } | ||
381 | /* | 335 | /* |
382 | * nobody can do a successful mnt_want_write() with all | 336 | * With writers on hold, if this value is zero, then there are |
383 | * of the counts in MNT_DENIED_WRITE and the locks held. | 337 | * definitely no active writers (although held writers may subsequently |
338 | * increment the count, they'll have to wait, and decrement it after | ||
339 | * seeing MNT_READONLY). | ||
340 | * | ||
341 | * It is OK to have counter incremented on one CPU and decremented on | ||
342 | * another: the sum will add up correctly. The danger would be when we | ||
343 | * sum up each counter, if we read a counter before it is incremented, | ||
344 | * but then read another CPU's count which it has been subsequently | ||
345 | * decremented from -- we would see more decrements than we should. | ||
346 | * MNT_WRITE_HOLD protects against this scenario, because | ||
347 | * mnt_want_write first increments count, then smp_mb, then spins on | ||
348 | * MNT_WRITE_HOLD, so it can't be decremented by another CPU while | ||
349 | * we're counting up here. | ||
384 | */ | 350 | */ |
385 | spin_lock(&vfsmount_lock); | 351 | if (count_mnt_writers(mnt) > 0) |
386 | if (!ret) | 352 | ret = -EBUSY; |
353 | else | ||
387 | mnt->mnt_flags |= MNT_READONLY; | 354 | mnt->mnt_flags |= MNT_READONLY; |
355 | /* | ||
356 | * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers | ||
357 | * that become unheld will see MNT_READONLY. | ||
358 | */ | ||
359 | smp_wmb(); | ||
360 | mnt->mnt_flags &= ~MNT_WRITE_HOLD; | ||
388 | spin_unlock(&vfsmount_lock); | 361 | spin_unlock(&vfsmount_lock); |
389 | out: | ||
390 | unlock_mnt_writers(); | ||
391 | return ret; | 362 | return ret; |
392 | } | 363 | } |
393 | 364 | ||
@@ -410,6 +381,9 @@ void free_vfsmnt(struct vfsmount *mnt) | |||
410 | { | 381 | { |
411 | kfree(mnt->mnt_devname); | 382 | kfree(mnt->mnt_devname); |
412 | mnt_free_id(mnt); | 383 | mnt_free_id(mnt); |
384 | #ifdef CONFIG_SMP | ||
385 | free_percpu(mnt->mnt_writers); | ||
386 | #endif | ||
413 | kmem_cache_free(mnt_cache, mnt); | 387 | kmem_cache_free(mnt_cache, mnt); |
414 | } | 388 | } |
415 | 389 | ||
@@ -442,11 +416,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | |||
442 | * lookup_mnt increments the ref count before returning | 416 | * lookup_mnt increments the ref count before returning |
443 | * the vfsmount struct. | 417 | * the vfsmount struct. |
444 | */ | 418 | */ |
445 | struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | 419 | struct vfsmount *lookup_mnt(struct path *path) |
446 | { | 420 | { |
447 | struct vfsmount *child_mnt; | 421 | struct vfsmount *child_mnt; |
448 | spin_lock(&vfsmount_lock); | 422 | spin_lock(&vfsmount_lock); |
449 | if ((child_mnt = __lookup_mnt(mnt, dentry, 1))) | 423 | if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) |
450 | mntget(child_mnt); | 424 | mntget(child_mnt); |
451 | spin_unlock(&vfsmount_lock); | 425 | spin_unlock(&vfsmount_lock); |
452 | return child_mnt; | 426 | return child_mnt; |
@@ -604,38 +578,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
604 | 578 | ||
605 | static inline void __mntput(struct vfsmount *mnt) | 579 | static inline void __mntput(struct vfsmount *mnt) |
606 | { | 580 | { |
607 | int cpu; | ||
608 | struct super_block *sb = mnt->mnt_sb; | 581 | struct super_block *sb = mnt->mnt_sb; |
609 | /* | 582 | /* |
610 | * We don't have to hold all of the locks at the | ||
611 | * same time here because we know that we're the | ||
612 | * last reference to mnt and that no new writers | ||
613 | * can come in. | ||
614 | */ | ||
615 | for_each_possible_cpu(cpu) { | ||
616 | struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); | ||
617 | spin_lock(&cpu_writer->lock); | ||
618 | if (cpu_writer->mnt != mnt) { | ||
619 | spin_unlock(&cpu_writer->lock); | ||
620 | continue; | ||
621 | } | ||
622 | atomic_add(cpu_writer->count, &mnt->__mnt_writers); | ||
623 | cpu_writer->count = 0; | ||
624 | /* | ||
625 | * Might as well do this so that no one | ||
626 | * ever sees the pointer and expects | ||
627 | * it to be valid. | ||
628 | */ | ||
629 | cpu_writer->mnt = NULL; | ||
630 | spin_unlock(&cpu_writer->lock); | ||
631 | } | ||
632 | /* | ||
633 | * This probably indicates that somebody messed | 583 | * This probably indicates that somebody messed |
634 | * up a mnt_want/drop_write() pair. If this | 584 | * up a mnt_want/drop_write() pair. If this |
635 | * happens, the filesystem was probably unable | 585 | * happens, the filesystem was probably unable |
636 | * to make r/w->r/o transitions. | 586 | * to make r/w->r/o transitions. |
637 | */ | 587 | */ |
638 | WARN_ON(atomic_read(&mnt->__mnt_writers)); | 588 | /* |
589 | * atomic_dec_and_lock() used to deal with ->mnt_count decrements | ||
590 | * provides barriers, so count_mnt_writers() below is safe. AV | ||
591 | */ | ||
592 | WARN_ON(count_mnt_writers(mnt)); | ||
639 | dput(mnt->mnt_root); | 593 | dput(mnt->mnt_root); |
640 | free_vfsmnt(mnt); | 594 | free_vfsmnt(mnt); |
641 | deactivate_super(sb); | 595 | deactivate_super(sb); |
@@ -1106,11 +1060,8 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
1106 | * we just try to remount it readonly. | 1060 | * we just try to remount it readonly. |
1107 | */ | 1061 | */ |
1108 | down_write(&sb->s_umount); | 1062 | down_write(&sb->s_umount); |
1109 | if (!(sb->s_flags & MS_RDONLY)) { | 1063 | if (!(sb->s_flags & MS_RDONLY)) |
1110 | lock_kernel(); | ||
1111 | retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); | 1064 | retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); |
1112 | unlock_kernel(); | ||
1113 | } | ||
1114 | up_write(&sb->s_umount); | 1065 | up_write(&sb->s_umount); |
1115 | return retval; | 1066 | return retval; |
1116 | } | 1067 | } |
@@ -1253,11 +1204,11 @@ Enomem: | |||
1253 | return NULL; | 1204 | return NULL; |
1254 | } | 1205 | } |
1255 | 1206 | ||
1256 | struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) | 1207 | struct vfsmount *collect_mounts(struct path *path) |
1257 | { | 1208 | { |
1258 | struct vfsmount *tree; | 1209 | struct vfsmount *tree; |
1259 | down_write(&namespace_sem); | 1210 | down_write(&namespace_sem); |
1260 | tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); | 1211 | tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE); |
1261 | up_write(&namespace_sem); | 1212 | up_write(&namespace_sem); |
1262 | return tree; | 1213 | return tree; |
1263 | } | 1214 | } |
@@ -1430,7 +1381,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) | |||
1430 | goto out_unlock; | 1381 | goto out_unlock; |
1431 | 1382 | ||
1432 | err = -ENOENT; | 1383 | err = -ENOENT; |
1433 | if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) | 1384 | if (!d_unlinked(path->dentry)) |
1434 | err = attach_recursive_mnt(mnt, path, NULL); | 1385 | err = attach_recursive_mnt(mnt, path, NULL); |
1435 | out_unlock: | 1386 | out_unlock: |
1436 | mutex_unlock(&path->dentry->d_inode->i_mutex); | 1387 | mutex_unlock(&path->dentry->d_inode->i_mutex); |
@@ -1601,7 +1552,7 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1601 | 1552 | ||
1602 | down_write(&namespace_sem); | 1553 | down_write(&namespace_sem); |
1603 | while (d_mountpoint(path->dentry) && | 1554 | while (d_mountpoint(path->dentry) && |
1604 | follow_down(&path->mnt, &path->dentry)) | 1555 | follow_down(path)) |
1605 | ; | 1556 | ; |
1606 | err = -EINVAL; | 1557 | err = -EINVAL; |
1607 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) | 1558 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) |
@@ -1612,7 +1563,7 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1612 | if (IS_DEADDIR(path->dentry->d_inode)) | 1563 | if (IS_DEADDIR(path->dentry->d_inode)) |
1613 | goto out1; | 1564 | goto out1; |
1614 | 1565 | ||
1615 | if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) | 1566 | if (d_unlinked(path->dentry)) |
1616 | goto out1; | 1567 | goto out1; |
1617 | 1568 | ||
1618 | err = -EINVAL; | 1569 | err = -EINVAL; |
@@ -1676,7 +1627,9 @@ static int do_new_mount(struct path *path, char *type, int flags, | |||
1676 | if (!capable(CAP_SYS_ADMIN)) | 1627 | if (!capable(CAP_SYS_ADMIN)) |
1677 | return -EPERM; | 1628 | return -EPERM; |
1678 | 1629 | ||
1630 | lock_kernel(); | ||
1679 | mnt = do_kern_mount(type, flags, name, data); | 1631 | mnt = do_kern_mount(type, flags, name, data); |
1632 | unlock_kernel(); | ||
1680 | if (IS_ERR(mnt)) | 1633 | if (IS_ERR(mnt)) |
1681 | return PTR_ERR(mnt); | 1634 | return PTR_ERR(mnt); |
1682 | 1635 | ||
@@ -1695,10 +1648,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, | |||
1695 | down_write(&namespace_sem); | 1648 | down_write(&namespace_sem); |
1696 | /* Something was mounted here while we slept */ | 1649 | /* Something was mounted here while we slept */ |
1697 | while (d_mountpoint(path->dentry) && | 1650 | while (d_mountpoint(path->dentry) && |
1698 | follow_down(&path->mnt, &path->dentry)) | 1651 | follow_down(path)) |
1699 | ; | 1652 | ; |
1700 | err = -EINVAL; | 1653 | err = -EINVAL; |
1701 | if (!check_mnt(path->mnt)) | 1654 | if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) |
1702 | goto unlock; | 1655 | goto unlock; |
1703 | 1656 | ||
1704 | /* Refuse the same filesystem on the same mount point */ | 1657 | /* Refuse the same filesystem on the same mount point */ |
@@ -2092,10 +2045,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, | |||
2092 | if (retval < 0) | 2045 | if (retval < 0) |
2093 | goto out3; | 2046 | goto out3; |
2094 | 2047 | ||
2095 | lock_kernel(); | ||
2096 | retval = do_mount((char *)dev_page, dir_page, (char *)type_page, | 2048 | retval = do_mount((char *)dev_page, dir_page, (char *)type_page, |
2097 | flags, (void *)data_page); | 2049 | flags, (void *)data_page); |
2098 | unlock_kernel(); | ||
2099 | free_page(data_page); | 2050 | free_page(data_page); |
2100 | 2051 | ||
2101 | out3: | 2052 | out3: |
@@ -2175,9 +2126,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2175 | error = -ENOENT; | 2126 | error = -ENOENT; |
2176 | if (IS_DEADDIR(new.dentry->d_inode)) | 2127 | if (IS_DEADDIR(new.dentry->d_inode)) |
2177 | goto out2; | 2128 | goto out2; |
2178 | if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry)) | 2129 | if (d_unlinked(new.dentry)) |
2179 | goto out2; | 2130 | goto out2; |
2180 | if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry)) | 2131 | if (d_unlinked(old.dentry)) |
2181 | goto out2; | 2132 | goto out2; |
2182 | error = -EBUSY; | 2133 | error = -EBUSY; |
2183 | if (new.mnt == root.mnt || | 2134 | if (new.mnt == root.mnt || |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d642f0e5b365..b99ce205b1bd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -736,6 +736,8 @@ static void ncp_put_super(struct super_block *sb) | |||
736 | { | 736 | { |
737 | struct ncp_server *server = NCP_SBP(sb); | 737 | struct ncp_server *server = NCP_SBP(sb); |
738 | 738 | ||
739 | lock_kernel(); | ||
740 | |||
739 | ncp_lock_server(server); | 741 | ncp_lock_server(server); |
740 | ncp_disconnect(server); | 742 | ncp_disconnect(server); |
741 | ncp_unlock_server(server); | 743 | ncp_unlock_server(server); |
@@ -769,6 +771,8 @@ static void ncp_put_super(struct super_block *sb) | |||
769 | vfree(server->packet); | 771 | vfree(server->packet); |
770 | sb->s_fs_info = NULL; | 772 | sb->s_fs_info = NULL; |
771 | kfree(server); | 773 | kfree(server); |
774 | |||
775 | unlock_kernel(); | ||
772 | } | 776 | } |
773 | 777 | ||
774 | static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) | 778 | static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 64a288ee046d..f01caec84463 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -154,7 +154,7 @@ out_err: | |||
154 | goto out; | 154 | goto out; |
155 | out_follow: | 155 | out_follow: |
156 | while (d_mountpoint(nd->path.dentry) && | 156 | while (d_mountpoint(nd->path.dentry) && |
157 | follow_down(&nd->path.mnt, &nd->path.dentry)) | 157 | follow_down(&nd->path)) |
158 | ; | 158 | ; |
159 | err = 0; | 159 | err = 0; |
160 | goto out; | 160 | goto out; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d2d67781c579..26127b69a275 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1813,6 +1813,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
1813 | if (data == NULL) | 1813 | if (data == NULL) |
1814 | return -ENOMEM; | 1814 | return -ENOMEM; |
1815 | 1815 | ||
1816 | lock_kernel(); | ||
1816 | /* fill out struct with values from existing mount */ | 1817 | /* fill out struct with values from existing mount */ |
1817 | data->flags = nfss->flags; | 1818 | data->flags = nfss->flags; |
1818 | data->rsize = nfss->rsize; | 1819 | data->rsize = nfss->rsize; |
@@ -1837,6 +1838,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
1837 | error = nfs_compare_remount_data(nfss, data); | 1838 | error = nfs_compare_remount_data(nfss, data); |
1838 | out: | 1839 | out: |
1839 | kfree(data); | 1840 | kfree(data); |
1841 | unlock_kernel(); | ||
1840 | return error; | 1842 | return error; |
1841 | } | 1843 | } |
1842 | 1844 | ||
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5839b229cd0e..8b1f8efb4690 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -847,9 +847,8 @@ exp_get_fsid_key(svc_client *clp, int fsid) | |||
847 | return exp_find_key(clp, FSID_NUM, fsidv, NULL); | 847 | return exp_find_key(clp, FSID_NUM, fsidv, NULL); |
848 | } | 848 | } |
849 | 849 | ||
850 | static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, | 850 | static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, |
851 | struct dentry *dentry, | 851 | struct cache_req *reqp) |
852 | struct cache_req *reqp) | ||
853 | { | 852 | { |
854 | struct svc_export *exp, key; | 853 | struct svc_export *exp, key; |
855 | int err; | 854 | int err; |
@@ -858,8 +857,7 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, | |||
858 | return ERR_PTR(-ENOENT); | 857 | return ERR_PTR(-ENOENT); |
859 | 858 | ||
860 | key.ex_client = clp; | 859 | key.ex_client = clp; |
861 | key.ex_path.mnt = mnt; | 860 | key.ex_path = *path; |
862 | key.ex_path.dentry = dentry; | ||
863 | 861 | ||
864 | exp = svc_export_lookup(&key); | 862 | exp = svc_export_lookup(&key); |
865 | if (exp == NULL) | 863 | if (exp == NULL) |
@@ -873,24 +871,19 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, | |||
873 | /* | 871 | /* |
874 | * Find the export entry for a given dentry. | 872 | * Find the export entry for a given dentry. |
875 | */ | 873 | */ |
876 | static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt, | 874 | static struct svc_export *exp_parent(svc_client *clp, struct path *path) |
877 | struct dentry *dentry, | ||
878 | struct cache_req *reqp) | ||
879 | { | 875 | { |
880 | svc_export *exp; | 876 | struct dentry *saved = dget(path->dentry); |
881 | 877 | svc_export *exp = exp_get_by_name(clp, path, NULL); | |
882 | dget(dentry); | 878 | |
883 | exp = exp_get_by_name(clp, mnt, dentry, reqp); | 879 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { |
884 | 880 | struct dentry *parent = dget_parent(path->dentry); | |
885 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { | 881 | dput(path->dentry); |
886 | struct dentry *parent; | 882 | path->dentry = parent; |
887 | 883 | exp = exp_get_by_name(clp, path, NULL); | |
888 | parent = dget_parent(dentry); | ||
889 | dput(dentry); | ||
890 | dentry = parent; | ||
891 | exp = exp_get_by_name(clp, mnt, dentry, reqp); | ||
892 | } | 884 | } |
893 | dput(dentry); | 885 | dput(path->dentry); |
886 | path->dentry = saved; | ||
894 | return exp; | 887 | return exp; |
895 | } | 888 | } |
896 | 889 | ||
@@ -1018,7 +1011,7 @@ exp_export(struct nfsctl_export *nxp) | |||
1018 | goto out_put_clp; | 1011 | goto out_put_clp; |
1019 | err = -EINVAL; | 1012 | err = -EINVAL; |
1020 | 1013 | ||
1021 | exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL); | 1014 | exp = exp_get_by_name(clp, &path, NULL); |
1022 | 1015 | ||
1023 | memset(&new, 0, sizeof(new)); | 1016 | memset(&new, 0, sizeof(new)); |
1024 | 1017 | ||
@@ -1135,7 +1128,7 @@ exp_unexport(struct nfsctl_export *nxp) | |||
1135 | goto out_domain; | 1128 | goto out_domain; |
1136 | 1129 | ||
1137 | err = -EINVAL; | 1130 | err = -EINVAL; |
1138 | exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL); | 1131 | exp = exp_get_by_name(dom, &path, NULL); |
1139 | path_put(&path); | 1132 | path_put(&path); |
1140 | if (IS_ERR(exp)) | 1133 | if (IS_ERR(exp)) |
1141 | goto out_domain; | 1134 | goto out_domain; |
@@ -1177,7 +1170,7 @@ exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) | |||
1177 | dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", | 1170 | dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", |
1178 | name, path.dentry, clp->name, | 1171 | name, path.dentry, clp->name, |
1179 | inode->i_sb->s_id, inode->i_ino); | 1172 | inode->i_sb->s_id, inode->i_ino); |
1180 | exp = exp_parent(clp, path.mnt, path.dentry, NULL); | 1173 | exp = exp_parent(clp, &path); |
1181 | if (IS_ERR(exp)) { | 1174 | if (IS_ERR(exp)) { |
1182 | err = PTR_ERR(exp); | 1175 | err = PTR_ERR(exp); |
1183 | goto out; | 1176 | goto out; |
@@ -1207,7 +1200,7 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, | |||
1207 | if (IS_ERR(ek)) | 1200 | if (IS_ERR(ek)) |
1208 | return ERR_CAST(ek); | 1201 | return ERR_CAST(ek); |
1209 | 1202 | ||
1210 | exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp); | 1203 | exp = exp_get_by_name(clp, &ek->ek_path, reqp); |
1211 | cache_put(&ek->h, &svc_expkey_cache); | 1204 | cache_put(&ek->h, &svc_expkey_cache); |
1212 | 1205 | ||
1213 | if (IS_ERR(exp)) | 1206 | if (IS_ERR(exp)) |
@@ -1247,8 +1240,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) | |||
1247 | * use exp_get_by_name() or exp_find(). | 1240 | * use exp_get_by_name() or exp_find(). |
1248 | */ | 1241 | */ |
1249 | struct svc_export * | 1242 | struct svc_export * |
1250 | rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, | 1243 | rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) |
1251 | struct dentry *dentry) | ||
1252 | { | 1244 | { |
1253 | struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); | 1245 | struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); |
1254 | 1246 | ||
@@ -1256,8 +1248,7 @@ rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, | |||
1256 | goto gss; | 1248 | goto gss; |
1257 | 1249 | ||
1258 | /* First try the auth_unix client: */ | 1250 | /* First try the auth_unix client: */ |
1259 | exp = exp_get_by_name(rqstp->rq_client, mnt, dentry, | 1251 | exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle); |
1260 | &rqstp->rq_chandle); | ||
1261 | if (PTR_ERR(exp) == -ENOENT) | 1252 | if (PTR_ERR(exp) == -ENOENT) |
1262 | goto gss; | 1253 | goto gss; |
1263 | if (IS_ERR(exp)) | 1254 | if (IS_ERR(exp)) |
@@ -1269,8 +1260,7 @@ gss: | |||
1269 | /* Otherwise, try falling back on gss client */ | 1260 | /* Otherwise, try falling back on gss client */ |
1270 | if (rqstp->rq_gssclient == NULL) | 1261 | if (rqstp->rq_gssclient == NULL) |
1271 | return exp; | 1262 | return exp; |
1272 | gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry, | 1263 | gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle); |
1273 | &rqstp->rq_chandle); | ||
1274 | if (PTR_ERR(gssexp) == -ENOENT) | 1264 | if (PTR_ERR(gssexp) == -ENOENT) |
1275 | return exp; | 1265 | return exp; |
1276 | if (!IS_ERR(exp)) | 1266 | if (!IS_ERR(exp)) |
@@ -1309,23 +1299,19 @@ gss: | |||
1309 | } | 1299 | } |
1310 | 1300 | ||
1311 | struct svc_export * | 1301 | struct svc_export * |
1312 | rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, | 1302 | rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) |
1313 | struct dentry *dentry) | ||
1314 | { | 1303 | { |
1315 | struct svc_export *exp; | 1304 | struct dentry *saved = dget(path->dentry); |
1316 | 1305 | struct svc_export *exp = rqst_exp_get_by_name(rqstp, path); | |
1317 | dget(dentry); | 1306 | |
1318 | exp = rqst_exp_get_by_name(rqstp, mnt, dentry); | 1307 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { |
1319 | 1308 | struct dentry *parent = dget_parent(path->dentry); | |
1320 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { | 1309 | dput(path->dentry); |
1321 | struct dentry *parent; | 1310 | path->dentry = parent; |
1322 | 1311 | exp = rqst_exp_get_by_name(rqstp, path); | |
1323 | parent = dget_parent(dentry); | ||
1324 | dput(dentry); | ||
1325 | dentry = parent; | ||
1326 | exp = rqst_exp_get_by_name(rqstp, mnt, dentry); | ||
1327 | } | 1312 | } |
1328 | dput(dentry); | 1313 | dput(path->dentry); |
1314 | path->dentry = saved; | ||
1329 | return exp; | 1315 | return exp; |
1330 | } | 1316 | } |
1331 | 1317 | ||
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b660435978d2..99f835753596 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/security.h> | 55 | #include <linux/security.h> |
56 | #endif /* CONFIG_NFSD_V4 */ | 56 | #endif /* CONFIG_NFSD_V4 */ |
57 | #include <linux/jhash.h> | 57 | #include <linux/jhash.h> |
58 | #include <linux/ima.h> | ||
58 | 59 | ||
59 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
60 | 61 | ||
@@ -100,36 +101,35 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
100 | { | 101 | { |
101 | struct svc_export *exp = *expp, *exp2 = NULL; | 102 | struct svc_export *exp = *expp, *exp2 = NULL; |
102 | struct dentry *dentry = *dpp; | 103 | struct dentry *dentry = *dpp; |
103 | struct vfsmount *mnt = mntget(exp->ex_path.mnt); | 104 | struct path path = {.mnt = mntget(exp->ex_path.mnt), |
104 | struct dentry *mounts = dget(dentry); | 105 | .dentry = dget(dentry)}; |
105 | int err = 0; | 106 | int err = 0; |
106 | 107 | ||
107 | while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); | 108 | while (d_mountpoint(path.dentry) && follow_down(&path)) |
109 | ; | ||
108 | 110 | ||
109 | exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); | 111 | exp2 = rqst_exp_get_by_name(rqstp, &path); |
110 | if (IS_ERR(exp2)) { | 112 | if (IS_ERR(exp2)) { |
111 | if (PTR_ERR(exp2) != -ENOENT) | 113 | if (PTR_ERR(exp2) != -ENOENT) |
112 | err = PTR_ERR(exp2); | 114 | err = PTR_ERR(exp2); |
113 | dput(mounts); | 115 | path_put(&path); |
114 | mntput(mnt); | ||
115 | goto out; | 116 | goto out; |
116 | } | 117 | } |
117 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { | 118 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { |
118 | /* successfully crossed mount point */ | 119 | /* successfully crossed mount point */ |
119 | /* | 120 | /* |
120 | * This is subtle: dentry is *not* under mnt at this point. | 121 | * This is subtle: path.dentry is *not* on path.mnt |
121 | * The only reason we are safe is that original mnt is pinned | 122 | * at this point. The only reason we are safe is that |
122 | * down by exp, so we should dput before putting exp. | 123 | * original mnt is pinned down by exp, so we should |
124 | * put path *before* putting exp | ||
123 | */ | 125 | */ |
124 | dput(dentry); | 126 | *dpp = path.dentry; |
125 | *dpp = mounts; | 127 | path.dentry = dentry; |
126 | exp_put(exp); | ||
127 | *expp = exp2; | 128 | *expp = exp2; |
128 | } else { | 129 | exp2 = exp; |
129 | exp_put(exp2); | ||
130 | dput(mounts); | ||
131 | } | 130 | } |
132 | mntput(mnt); | 131 | path_put(&path); |
132 | exp_put(exp2); | ||
133 | out: | 133 | out: |
134 | return err; | 134 | return err; |
135 | } | 135 | } |
@@ -168,28 +168,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
168 | /* checking mountpoint crossing is very different when stepping up */ | 168 | /* checking mountpoint crossing is very different when stepping up */ |
169 | struct svc_export *exp2 = NULL; | 169 | struct svc_export *exp2 = NULL; |
170 | struct dentry *dp; | 170 | struct dentry *dp; |
171 | struct vfsmount *mnt = mntget(exp->ex_path.mnt); | 171 | struct path path = {.mnt = mntget(exp->ex_path.mnt), |
172 | dentry = dget(dparent); | 172 | .dentry = dget(dparent)}; |
173 | while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) | 173 | |
174 | while (path.dentry == path.mnt->mnt_root && | ||
175 | follow_up(&path)) | ||
174 | ; | 176 | ; |
175 | dp = dget_parent(dentry); | 177 | dp = dget_parent(path.dentry); |
176 | dput(dentry); | 178 | dput(path.dentry); |
177 | dentry = dp; | 179 | path.dentry = dp; |
178 | 180 | ||
179 | exp2 = rqst_exp_parent(rqstp, mnt, dentry); | 181 | exp2 = rqst_exp_parent(rqstp, &path); |
180 | if (PTR_ERR(exp2) == -ENOENT) { | 182 | if (PTR_ERR(exp2) == -ENOENT) { |
181 | dput(dentry); | ||
182 | dentry = dget(dparent); | 183 | dentry = dget(dparent); |
183 | } else if (IS_ERR(exp2)) { | 184 | } else if (IS_ERR(exp2)) { |
184 | host_err = PTR_ERR(exp2); | 185 | host_err = PTR_ERR(exp2); |
185 | dput(dentry); | 186 | path_put(&path); |
186 | mntput(mnt); | ||
187 | goto out_nfserr; | 187 | goto out_nfserr; |
188 | } else { | 188 | } else { |
189 | dentry = dget(path.dentry); | ||
189 | exp_put(exp); | 190 | exp_put(exp); |
190 | exp = exp2; | 191 | exp = exp2; |
191 | } | 192 | } |
192 | mntput(mnt); | 193 | path_put(&path); |
193 | } | 194 | } |
194 | } else { | 195 | } else { |
195 | fh_lock(fhp); | 196 | fh_lock(fhp); |
@@ -735,6 +736,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
735 | flags, cred); | 736 | flags, cred); |
736 | if (IS_ERR(*filp)) | 737 | if (IS_ERR(*filp)) |
737 | host_err = PTR_ERR(*filp); | 738 | host_err = PTR_ERR(*filp); |
739 | else | ||
740 | ima_counts_get(*filp); | ||
738 | out_nfserr: | 741 | out_nfserr: |
739 | err = nfserrno(host_err); | 742 | err = nfserrno(host_err); |
740 | out: | 743 | out: |
@@ -2024,6 +2027,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
2024 | struct dentry *dentry, int acc) | 2027 | struct dentry *dentry, int acc) |
2025 | { | 2028 | { |
2026 | struct inode *inode = dentry->d_inode; | 2029 | struct inode *inode = dentry->d_inode; |
2030 | struct path path; | ||
2027 | int err; | 2031 | int err; |
2028 | 2032 | ||
2029 | if (acc == NFSD_MAY_NOP) | 2033 | if (acc == NFSD_MAY_NOP) |
@@ -2096,7 +2100,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
2096 | if (err == -EACCES && S_ISREG(inode->i_mode) && | 2100 | if (err == -EACCES && S_ISREG(inode->i_mode) && |
2097 | acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) | 2101 | acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) |
2098 | err = inode_permission(inode, MAY_EXEC); | 2102 | err = inode_permission(inode, MAY_EXEC); |
2103 | if (err) | ||
2104 | goto nfsd_out; | ||
2099 | 2105 | ||
2106 | /* Do integrity (permission) checking now, but defer incrementing | ||
2107 | * IMA counts to the actual file open. | ||
2108 | */ | ||
2109 | path.mnt = exp->ex_path.mnt; | ||
2110 | path.dentry = dentry; | ||
2111 | err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC), | ||
2112 | IMA_COUNT_LEAVE); | ||
2113 | nfsd_out: | ||
2100 | return err? nfserrno(err) : 0; | 2114 | return err? nfserrno(err) : 0; |
2101 | } | 2115 | } |
2102 | 2116 | ||
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 300f1cdfa862..cadd36b14d07 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c | |||
@@ -864,11 +864,11 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) | |||
864 | case NILFS_CHECKPOINT: | 864 | case NILFS_CHECKPOINT: |
865 | /* | 865 | /* |
866 | * Check for protecting existing snapshot mounts: | 866 | * Check for protecting existing snapshot mounts: |
867 | * bd_mount_sem is used to make this operation atomic and | 867 | * ns_mount_mutex is used to make this operation atomic and |
868 | * exclusive with a new mount job. Though it doesn't cover | 868 | * exclusive with a new mount job. Though it doesn't cover |
869 | * umount, it's enough for the purpose. | 869 | * umount, it's enough for the purpose. |
870 | */ | 870 | */ |
871 | down(&nilfs->ns_bdev->bd_mount_sem); | 871 | mutex_lock(&nilfs->ns_mount_mutex); |
872 | if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { | 872 | if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { |
873 | /* Current implementation does not have to protect | 873 | /* Current implementation does not have to protect |
874 | plain read-only mounts since they are exclusive | 874 | plain read-only mounts since they are exclusive |
@@ -877,7 +877,7 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) | |||
877 | ret = -EBUSY; | 877 | ret = -EBUSY; |
878 | } else | 878 | } else |
879 | ret = nilfs_cpfile_clear_snapshot(cpfile, cno); | 879 | ret = nilfs_cpfile_clear_snapshot(cpfile, cno); |
880 | up(&nilfs->ns_bdev->bd_mount_sem); | 880 | mutex_unlock(&nilfs->ns_mount_mutex); |
881 | return ret; | 881 | return ret; |
882 | case NILFS_SNAPSHOT: | 882 | case NILFS_SNAPSHOT: |
883 | return nilfs_cpfile_set_snapshot(cpfile, cno); | 883 | return nilfs_cpfile_set_snapshot(cpfile, cno); |
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h index adccd4fc654e..0776ccc2504a 100644 --- a/fs/nilfs2/sb.h +++ b/fs/nilfs2/sb.h | |||
@@ -60,6 +60,7 @@ struct nilfs_sb_info { | |||
60 | struct super_block *s_super; /* reverse pointer to super_block */ | 60 | struct super_block *s_super; /* reverse pointer to super_block */ |
61 | struct the_nilfs *s_nilfs; | 61 | struct the_nilfs *s_nilfs; |
62 | struct list_head s_list; /* list head for nilfs->ns_supers */ | 62 | struct list_head s_list; /* list head for nilfs->ns_supers */ |
63 | atomic_t s_count; /* reference count */ | ||
63 | 64 | ||
64 | /* Segment constructor */ | 65 | /* Segment constructor */ |
65 | struct list_head s_dirty_files; /* dirty files list */ | 66 | struct list_head s_dirty_files; /* dirty files list */ |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6989b03e97ab..1777a3467bd2 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -65,9 +65,8 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " | |||
65 | "(NILFS)"); | 65 | "(NILFS)"); |
66 | MODULE_LICENSE("GPL"); | 66 | MODULE_LICENSE("GPL"); |
67 | 67 | ||
68 | static void nilfs_write_super(struct super_block *sb); | ||
68 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); | 69 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); |
69 | static int test_exclusive_mount(struct file_system_type *fs_type, | ||
70 | struct block_device *bdev, int flags); | ||
71 | 70 | ||
72 | /** | 71 | /** |
73 | * nilfs_error() - report failure condition on a filesystem | 72 | * nilfs_error() - report failure condition on a filesystem |
@@ -315,6 +314,11 @@ static void nilfs_put_super(struct super_block *sb) | |||
315 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 314 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
316 | struct the_nilfs *nilfs = sbi->s_nilfs; | 315 | struct the_nilfs *nilfs = sbi->s_nilfs; |
317 | 316 | ||
317 | lock_kernel(); | ||
318 | |||
319 | if (sb->s_dirt) | ||
320 | nilfs_write_super(sb); | ||
321 | |||
318 | nilfs_detach_segment_constructor(sbi); | 322 | nilfs_detach_segment_constructor(sbi); |
319 | 323 | ||
320 | if (!(sb->s_flags & MS_RDONLY)) { | 324 | if (!(sb->s_flags & MS_RDONLY)) { |
@@ -323,12 +327,18 @@ static void nilfs_put_super(struct super_block *sb) | |||
323 | nilfs_commit_super(sbi, 1); | 327 | nilfs_commit_super(sbi, 1); |
324 | up_write(&nilfs->ns_sem); | 328 | up_write(&nilfs->ns_sem); |
325 | } | 329 | } |
330 | down_write(&nilfs->ns_super_sem); | ||
331 | if (nilfs->ns_current == sbi) | ||
332 | nilfs->ns_current = NULL; | ||
333 | up_write(&nilfs->ns_super_sem); | ||
326 | 334 | ||
327 | nilfs_detach_checkpoint(sbi); | 335 | nilfs_detach_checkpoint(sbi); |
328 | put_nilfs(sbi->s_nilfs); | 336 | put_nilfs(sbi->s_nilfs); |
329 | sbi->s_super = NULL; | 337 | sbi->s_super = NULL; |
330 | sb->s_fs_info = NULL; | 338 | sb->s_fs_info = NULL; |
331 | kfree(sbi); | 339 | nilfs_put_sbinfo(sbi); |
340 | |||
341 | unlock_kernel(); | ||
332 | } | 342 | } |
333 | 343 | ||
334 | /** | 344 | /** |
@@ -383,6 +393,8 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) | |||
383 | { | 393 | { |
384 | int err = 0; | 394 | int err = 0; |
385 | 395 | ||
396 | nilfs_write_super(sb); | ||
397 | |||
386 | /* This function is called when super block should be written back */ | 398 | /* This function is called when super block should be written back */ |
387 | if (wait) | 399 | if (wait) |
388 | err = nilfs_construct_segment(sb); | 400 | err = nilfs_construct_segment(sb); |
@@ -396,9 +408,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
396 | struct buffer_head *bh_cp; | 408 | struct buffer_head *bh_cp; |
397 | int err; | 409 | int err; |
398 | 410 | ||
399 | down_write(&nilfs->ns_sem); | 411 | down_write(&nilfs->ns_super_sem); |
400 | list_add(&sbi->s_list, &nilfs->ns_supers); | 412 | list_add(&sbi->s_list, &nilfs->ns_supers); |
401 | up_write(&nilfs->ns_sem); | 413 | up_write(&nilfs->ns_super_sem); |
402 | 414 | ||
403 | sbi->s_ifile = nilfs_mdt_new( | 415 | sbi->s_ifile = nilfs_mdt_new( |
404 | nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); | 416 | nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); |
@@ -436,9 +448,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
436 | nilfs_mdt_destroy(sbi->s_ifile); | 448 | nilfs_mdt_destroy(sbi->s_ifile); |
437 | sbi->s_ifile = NULL; | 449 | sbi->s_ifile = NULL; |
438 | 450 | ||
439 | down_write(&nilfs->ns_sem); | 451 | down_write(&nilfs->ns_super_sem); |
440 | list_del_init(&sbi->s_list); | 452 | list_del_init(&sbi->s_list); |
441 | up_write(&nilfs->ns_sem); | 453 | up_write(&nilfs->ns_super_sem); |
442 | 454 | ||
443 | return err; | 455 | return err; |
444 | } | 456 | } |
@@ -450,9 +462,9 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) | |||
450 | nilfs_mdt_clear(sbi->s_ifile); | 462 | nilfs_mdt_clear(sbi->s_ifile); |
451 | nilfs_mdt_destroy(sbi->s_ifile); | 463 | nilfs_mdt_destroy(sbi->s_ifile); |
452 | sbi->s_ifile = NULL; | 464 | sbi->s_ifile = NULL; |
453 | down_write(&nilfs->ns_sem); | 465 | down_write(&nilfs->ns_super_sem); |
454 | list_del_init(&sbi->s_list); | 466 | list_del_init(&sbi->s_list); |
455 | up_write(&nilfs->ns_sem); | 467 | up_write(&nilfs->ns_super_sem); |
456 | } | 468 | } |
457 | 469 | ||
458 | static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) | 470 | static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) |
@@ -752,7 +764,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, | |||
752 | * @silent: silent mode flag | 764 | * @silent: silent mode flag |
753 | * @nilfs: the_nilfs struct | 765 | * @nilfs: the_nilfs struct |
754 | * | 766 | * |
755 | * This function is called exclusively by bd_mount_mutex. | 767 | * This function is called exclusively by nilfs->ns_mount_mutex. |
756 | * So, the recovery process is protected from other simultaneous mounts. | 768 | * So, the recovery process is protected from other simultaneous mounts. |
757 | */ | 769 | */ |
758 | static int | 770 | static int |
@@ -773,6 +785,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
773 | get_nilfs(nilfs); | 785 | get_nilfs(nilfs); |
774 | sbi->s_nilfs = nilfs; | 786 | sbi->s_nilfs = nilfs; |
775 | sbi->s_super = sb; | 787 | sbi->s_super = sb; |
788 | atomic_set(&sbi->s_count, 1); | ||
776 | 789 | ||
777 | err = init_nilfs(nilfs, sbi, (char *)data); | 790 | err = init_nilfs(nilfs, sbi, (char *)data); |
778 | if (err) | 791 | if (err) |
@@ -870,6 +883,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
870 | goto failed_root; | 883 | goto failed_root; |
871 | } | 884 | } |
872 | 885 | ||
886 | down_write(&nilfs->ns_super_sem); | ||
887 | if (!nilfs_test_opt(sbi, SNAPSHOT)) | ||
888 | nilfs->ns_current = sbi; | ||
889 | up_write(&nilfs->ns_super_sem); | ||
890 | |||
873 | return 0; | 891 | return 0; |
874 | 892 | ||
875 | failed_root: | 893 | failed_root: |
@@ -885,7 +903,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
885 | failed_sbi: | 903 | failed_sbi: |
886 | put_nilfs(nilfs); | 904 | put_nilfs(nilfs); |
887 | sb->s_fs_info = NULL; | 905 | sb->s_fs_info = NULL; |
888 | kfree(sbi); | 906 | nilfs_put_sbinfo(sbi); |
889 | return err; | 907 | return err; |
890 | } | 908 | } |
891 | 909 | ||
@@ -898,6 +916,9 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
898 | struct nilfs_mount_options old_opts; | 916 | struct nilfs_mount_options old_opts; |
899 | int err; | 917 | int err; |
900 | 918 | ||
919 | lock_kernel(); | ||
920 | |||
921 | down_write(&nilfs->ns_super_sem); | ||
901 | old_sb_flags = sb->s_flags; | 922 | old_sb_flags = sb->s_flags; |
902 | old_opts.mount_opt = sbi->s_mount_opt; | 923 | old_opts.mount_opt = sbi->s_mount_opt; |
903 | old_opts.snapshot_cno = sbi->s_snapshot_cno; | 924 | old_opts.snapshot_cno = sbi->s_snapshot_cno; |
@@ -945,14 +966,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
945 | * store the current valid flag. (It may have been changed | 966 | * store the current valid flag. (It may have been changed |
946 | * by fsck since we originally mounted the partition.) | 967 | * by fsck since we originally mounted the partition.) |
947 | */ | 968 | */ |
948 | down(&sb->s_bdev->bd_mount_sem); | 969 | if (nilfs->ns_current && nilfs->ns_current != sbi) { |
949 | /* Check existing RW-mount */ | ||
950 | if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { | ||
951 | printk(KERN_WARNING "NILFS (device %s): couldn't " | 970 | printk(KERN_WARNING "NILFS (device %s): couldn't " |
952 | "remount because a RW-mount exists.\n", | 971 | "remount because an RW-mount exists.\n", |
953 | sb->s_id); | 972 | sb->s_id); |
954 | err = -EBUSY; | 973 | err = -EBUSY; |
955 | goto rw_remount_failed; | 974 | goto restore_opts; |
956 | } | 975 | } |
957 | if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { | 976 | if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { |
958 | printk(KERN_WARNING "NILFS (device %s): couldn't " | 977 | printk(KERN_WARNING "NILFS (device %s): couldn't " |
@@ -960,7 +979,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
960 | "the latest one.\n", | 979 | "the latest one.\n", |
961 | sb->s_id); | 980 | sb->s_id); |
962 | err = -EINVAL; | 981 | err = -EINVAL; |
963 | goto rw_remount_failed; | 982 | goto restore_opts; |
964 | } | 983 | } |
965 | sb->s_flags &= ~MS_RDONLY; | 984 | sb->s_flags &= ~MS_RDONLY; |
966 | nilfs_clear_opt(sbi, SNAPSHOT); | 985 | nilfs_clear_opt(sbi, SNAPSHOT); |
@@ -968,28 +987,31 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
968 | 987 | ||
969 | err = nilfs_attach_segment_constructor(sbi); | 988 | err = nilfs_attach_segment_constructor(sbi); |
970 | if (err) | 989 | if (err) |
971 | goto rw_remount_failed; | 990 | goto restore_opts; |
972 | 991 | ||
973 | down_write(&nilfs->ns_sem); | 992 | down_write(&nilfs->ns_sem); |
974 | nilfs_setup_super(sbi); | 993 | nilfs_setup_super(sbi); |
975 | up_write(&nilfs->ns_sem); | 994 | up_write(&nilfs->ns_sem); |
976 | 995 | ||
977 | up(&sb->s_bdev->bd_mount_sem); | 996 | nilfs->ns_current = sbi; |
978 | } | 997 | } |
979 | out: | 998 | out: |
999 | up_write(&nilfs->ns_super_sem); | ||
1000 | unlock_kernel(); | ||
980 | return 0; | 1001 | return 0; |
981 | 1002 | ||
982 | rw_remount_failed: | ||
983 | up(&sb->s_bdev->bd_mount_sem); | ||
984 | restore_opts: | 1003 | restore_opts: |
985 | sb->s_flags = old_sb_flags; | 1004 | sb->s_flags = old_sb_flags; |
986 | sbi->s_mount_opt = old_opts.mount_opt; | 1005 | sbi->s_mount_opt = old_opts.mount_opt; |
987 | sbi->s_snapshot_cno = old_opts.snapshot_cno; | 1006 | sbi->s_snapshot_cno = old_opts.snapshot_cno; |
1007 | up_write(&nilfs->ns_super_sem); | ||
1008 | unlock_kernel(); | ||
988 | return err; | 1009 | return err; |
989 | } | 1010 | } |
990 | 1011 | ||
991 | struct nilfs_super_data { | 1012 | struct nilfs_super_data { |
992 | struct block_device *bdev; | 1013 | struct block_device *bdev; |
1014 | struct nilfs_sb_info *sbi; | ||
993 | __u64 cno; | 1015 | __u64 cno; |
994 | int flags; | 1016 | int flags; |
995 | }; | 1017 | }; |
@@ -1048,33 +1070,7 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data) | |||
1048 | { | 1070 | { |
1049 | struct nilfs_super_data *sd = data; | 1071 | struct nilfs_super_data *sd = data; |
1050 | 1072 | ||
1051 | return s->s_bdev == sd->bdev; | 1073 | return sd->sbi && s->s_fs_info == (void *)sd->sbi; |
1052 | } | ||
1053 | |||
1054 | static int nilfs_test_bdev_super2(struct super_block *s, void *data) | ||
1055 | { | ||
1056 | struct nilfs_super_data *sd = data; | ||
1057 | int ret; | ||
1058 | |||
1059 | if (s->s_bdev != sd->bdev) | ||
1060 | return 0; | ||
1061 | |||
1062 | if (!((s->s_flags | sd->flags) & MS_RDONLY)) | ||
1063 | return 1; /* Reuse an old R/W-mode super_block */ | ||
1064 | |||
1065 | if (s->s_flags & sd->flags & MS_RDONLY) { | ||
1066 | if (down_read_trylock(&s->s_umount)) { | ||
1067 | ret = s->s_root && | ||
1068 | (sd->cno == NILFS_SB(s)->s_snapshot_cno); | ||
1069 | up_read(&s->s_umount); | ||
1070 | /* | ||
1071 | * This path is locked with sb_lock by sget(). | ||
1072 | * So, drop_super() causes deadlock. | ||
1073 | */ | ||
1074 | return ret; | ||
1075 | } | ||
1076 | } | ||
1077 | return 0; | ||
1078 | } | 1074 | } |
1079 | 1075 | ||
1080 | static int | 1076 | static int |
@@ -1082,8 +1078,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1082 | const char *dev_name, void *data, struct vfsmount *mnt) | 1078 | const char *dev_name, void *data, struct vfsmount *mnt) |
1083 | { | 1079 | { |
1084 | struct nilfs_super_data sd; | 1080 | struct nilfs_super_data sd; |
1085 | struct super_block *s, *s2; | 1081 | struct super_block *s; |
1086 | struct the_nilfs *nilfs = NULL; | 1082 | struct the_nilfs *nilfs; |
1087 | int err, need_to_close = 1; | 1083 | int err, need_to_close = 1; |
1088 | 1084 | ||
1089 | sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); | 1085 | sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); |
@@ -1095,7 +1091,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1095 | * much more information than normal filesystems to identify mount | 1091 | * much more information than normal filesystems to identify mount |
1096 | * instance. For snapshot mounts, not only a mount type (ro-mount | 1092 | * instance. For snapshot mounts, not only a mount type (ro-mount |
1097 | * or rw-mount) but also a checkpoint number is required. | 1093 | * or rw-mount) but also a checkpoint number is required. |
1098 | * The results are passed in sget() using nilfs_super_data. | ||
1099 | */ | 1094 | */ |
1100 | sd.cno = 0; | 1095 | sd.cno = 0; |
1101 | sd.flags = flags; | 1096 | sd.flags = flags; |
@@ -1104,64 +1099,59 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1104 | goto failed; | 1099 | goto failed; |
1105 | } | 1100 | } |
1106 | 1101 | ||
1107 | /* | 1102 | nilfs = find_or_create_nilfs(sd.bdev); |
1108 | * once the super is inserted into the list by sget, s_umount | 1103 | if (!nilfs) { |
1109 | * will protect the lockfs code from trying to start a snapshot | 1104 | err = -ENOMEM; |
1110 | * while we are mounting | 1105 | goto failed; |
1111 | */ | ||
1112 | down(&sd.bdev->bd_mount_sem); | ||
1113 | if (!sd.cno && | ||
1114 | (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { | ||
1115 | err = (err < 0) ? : -EBUSY; | ||
1116 | goto failed_unlock; | ||
1117 | } | 1106 | } |
1118 | 1107 | ||
1119 | /* | 1108 | mutex_lock(&nilfs->ns_mount_mutex); |
1120 | * Phase-1: search any existent instance and get the_nilfs | ||
1121 | */ | ||
1122 | s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); | ||
1123 | if (IS_ERR(s)) | ||
1124 | goto error_s; | ||
1125 | |||
1126 | if (!s->s_root) { | ||
1127 | err = -ENOMEM; | ||
1128 | nilfs = alloc_nilfs(sd.bdev); | ||
1129 | if (!nilfs) | ||
1130 | goto cancel_new; | ||
1131 | } else { | ||
1132 | struct nilfs_sb_info *sbi = NILFS_SB(s); | ||
1133 | 1109 | ||
1110 | if (!sd.cno) { | ||
1134 | /* | 1111 | /* |
1135 | * s_umount protects super_block from unmount process; | 1112 | * Check if an exclusive mount exists or not. |
1136 | * It covers pointers of nilfs_sb_info and the_nilfs. | 1113 | * Snapshot mounts coexist with a current mount |
1114 | * (i.e. rw-mount or ro-mount), whereas rw-mount and | ||
1115 | * ro-mount are mutually exclusive. | ||
1137 | */ | 1116 | */ |
1138 | nilfs = sbi->s_nilfs; | 1117 | down_read(&nilfs->ns_super_sem); |
1139 | get_nilfs(nilfs); | 1118 | if (nilfs->ns_current && |
1140 | up_write(&s->s_umount); | 1119 | ((nilfs->ns_current->s_super->s_flags ^ flags) |
1120 | & MS_RDONLY)) { | ||
1121 | up_read(&nilfs->ns_super_sem); | ||
1122 | err = -EBUSY; | ||
1123 | goto failed_unlock; | ||
1124 | } | ||
1125 | up_read(&nilfs->ns_super_sem); | ||
1126 | } | ||
1141 | 1127 | ||
1142 | /* | 1128 | /* |
1143 | * Phase-2: search specified snapshot or R/W mode super_block | 1129 | * Find existing nilfs_sb_info struct |
1144 | */ | 1130 | */ |
1145 | if (!sd.cno) | 1131 | sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); |
1146 | /* trying to get the latest checkpoint. */ | ||
1147 | sd.cno = nilfs_last_cno(nilfs); | ||
1148 | 1132 | ||
1149 | s2 = sget(fs_type, nilfs_test_bdev_super2, | 1133 | if (!sd.cno) |
1150 | nilfs_set_bdev_super, &sd); | 1134 | /* trying to get the latest checkpoint. */ |
1151 | deactivate_super(s); | 1135 | sd.cno = nilfs_last_cno(nilfs); |
1152 | /* | 1136 | |
1153 | * Although deactivate_super() invokes close_bdev_exclusive() at | 1137 | /* |
1154 | * kill_block_super(). Here, s is an existent mount; we need | 1138 | * Get super block instance holding the nilfs_sb_info struct. |
1155 | * one more close_bdev_exclusive() call. | 1139 | * A new instance is allocated if no existing mount is present or |
1156 | */ | 1140 | * existing instance has been unmounted. |
1157 | s = s2; | 1141 | */ |
1158 | if (IS_ERR(s)) | 1142 | s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); |
1159 | goto error_s; | 1143 | if (sd.sbi) |
1144 | nilfs_put_sbinfo(sd.sbi); | ||
1145 | |||
1146 | if (IS_ERR(s)) { | ||
1147 | err = PTR_ERR(s); | ||
1148 | goto failed_unlock; | ||
1160 | } | 1149 | } |
1161 | 1150 | ||
1162 | if (!s->s_root) { | 1151 | if (!s->s_root) { |
1163 | char b[BDEVNAME_SIZE]; | 1152 | char b[BDEVNAME_SIZE]; |
1164 | 1153 | ||
1154 | /* New superblock instance created */ | ||
1165 | s->s_flags = flags; | 1155 | s->s_flags = flags; |
1166 | strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); | 1156 | strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); |
1167 | sb_set_blocksize(s, block_size(sd.bdev)); | 1157 | sb_set_blocksize(s, block_size(sd.bdev)); |
@@ -1172,26 +1162,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1172 | 1162 | ||
1173 | s->s_flags |= MS_ACTIVE; | 1163 | s->s_flags |= MS_ACTIVE; |
1174 | need_to_close = 0; | 1164 | need_to_close = 0; |
1175 | } else if (!(s->s_flags & MS_RDONLY)) { | ||
1176 | err = -EBUSY; | ||
1177 | } | 1165 | } |
1178 | 1166 | ||
1179 | up(&sd.bdev->bd_mount_sem); | 1167 | mutex_unlock(&nilfs->ns_mount_mutex); |
1180 | put_nilfs(nilfs); | 1168 | put_nilfs(nilfs); |
1181 | if (need_to_close) | 1169 | if (need_to_close) |
1182 | close_bdev_exclusive(sd.bdev, flags); | 1170 | close_bdev_exclusive(sd.bdev, flags); |
1183 | simple_set_mnt(mnt, s); | 1171 | simple_set_mnt(mnt, s); |
1184 | return 0; | 1172 | return 0; |
1185 | 1173 | ||
1186 | error_s: | ||
1187 | up(&sd.bdev->bd_mount_sem); | ||
1188 | if (nilfs) | ||
1189 | put_nilfs(nilfs); | ||
1190 | close_bdev_exclusive(sd.bdev, flags); | ||
1191 | return PTR_ERR(s); | ||
1192 | |||
1193 | failed_unlock: | 1174 | failed_unlock: |
1194 | up(&sd.bdev->bd_mount_sem); | 1175 | mutex_unlock(&nilfs->ns_mount_mutex); |
1176 | put_nilfs(nilfs); | ||
1195 | failed: | 1177 | failed: |
1196 | close_bdev_exclusive(sd.bdev, flags); | 1178 | close_bdev_exclusive(sd.bdev, flags); |
1197 | 1179 | ||
@@ -1199,70 +1181,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1199 | 1181 | ||
1200 | cancel_new: | 1182 | cancel_new: |
1201 | /* Abandoning the newly allocated superblock */ | 1183 | /* Abandoning the newly allocated superblock */ |
1202 | up(&sd.bdev->bd_mount_sem); | 1184 | mutex_unlock(&nilfs->ns_mount_mutex); |
1203 | if (nilfs) | 1185 | put_nilfs(nilfs); |
1204 | put_nilfs(nilfs); | ||
1205 | up_write(&s->s_umount); | 1186 | up_write(&s->s_umount); |
1206 | deactivate_super(s); | 1187 | deactivate_super(s); |
1207 | /* | 1188 | /* |
1208 | * deactivate_super() invokes close_bdev_exclusive(). | 1189 | * deactivate_super() invokes close_bdev_exclusive(). |
1209 | * We must finish all post-cleaning before this call; | 1190 | * We must finish all post-cleaning before this call; |
1210 | * put_nilfs() and unlocking bd_mount_sem need the block device. | 1191 | * put_nilfs() needs the block device. |
1211 | */ | 1192 | */ |
1212 | return err; | 1193 | return err; |
1213 | } | 1194 | } |
1214 | 1195 | ||
1215 | static int nilfs_test_bdev_super3(struct super_block *s, void *data) | ||
1216 | { | ||
1217 | struct nilfs_super_data *sd = data; | ||
1218 | int ret; | ||
1219 | |||
1220 | if (s->s_bdev != sd->bdev) | ||
1221 | return 0; | ||
1222 | if (down_read_trylock(&s->s_umount)) { | ||
1223 | ret = (s->s_flags & MS_RDONLY) && s->s_root && | ||
1224 | nilfs_test_opt(NILFS_SB(s), SNAPSHOT); | ||
1225 | up_read(&s->s_umount); | ||
1226 | if (ret) | ||
1227 | return 0; /* ignore snapshot mounts */ | ||
1228 | } | ||
1229 | return !((sd->flags ^ s->s_flags) & MS_RDONLY); | ||
1230 | } | ||
1231 | |||
1232 | static int __false_bdev_super(struct super_block *s, void *data) | ||
1233 | { | ||
1234 | #if 0 /* XXX: workaround for lock debug. This is not good idea */ | ||
1235 | up_write(&s->s_umount); | ||
1236 | #endif | ||
1237 | return -EFAULT; | ||
1238 | } | ||
1239 | |||
1240 | /** | ||
1241 | * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. | ||
1242 | * fs_type: filesystem type | ||
1243 | * bdev: block device | ||
1244 | * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) | ||
1245 | * res: pointer to an integer to store result | ||
1246 | * | ||
1247 | * This function must be called within a section protected by bd_mount_mutex. | ||
1248 | */ | ||
1249 | static int test_exclusive_mount(struct file_system_type *fs_type, | ||
1250 | struct block_device *bdev, int flags) | ||
1251 | { | ||
1252 | struct super_block *s; | ||
1253 | struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; | ||
1254 | |||
1255 | s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); | ||
1256 | if (IS_ERR(s)) { | ||
1257 | if (PTR_ERR(s) != -EFAULT) | ||
1258 | return PTR_ERR(s); | ||
1259 | return 0; /* Not found */ | ||
1260 | } | ||
1261 | up_write(&s->s_umount); | ||
1262 | deactivate_super(s); | ||
1263 | return 1; /* Found */ | ||
1264 | } | ||
1265 | |||
1266 | struct file_system_type nilfs_fs_type = { | 1196 | struct file_system_type nilfs_fs_type = { |
1267 | .owner = THIS_MODULE, | 1197 | .owner = THIS_MODULE, |
1268 | .name = "nilfs2", | 1198 | .name = "nilfs2", |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 7f65b3be4aa9..e4e5c78bcc93 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -35,6 +35,10 @@ | |||
35 | #include "seglist.h" | 35 | #include "seglist.h" |
36 | #include "segbuf.h" | 36 | #include "segbuf.h" |
37 | 37 | ||
38 | |||
39 | static LIST_HEAD(nilfs_objects); | ||
40 | static DEFINE_SPINLOCK(nilfs_lock); | ||
41 | |||
38 | void nilfs_set_last_segment(struct the_nilfs *nilfs, | 42 | void nilfs_set_last_segment(struct the_nilfs *nilfs, |
39 | sector_t start_blocknr, u64 seq, __u64 cno) | 43 | sector_t start_blocknr, u64 seq, __u64 cno) |
40 | { | 44 | { |
@@ -55,7 +59,7 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, | |||
55 | * Return Value: On success, pointer to the_nilfs is returned. | 59 | * Return Value: On success, pointer to the_nilfs is returned. |
56 | * On error, NULL is returned. | 60 | * On error, NULL is returned. |
57 | */ | 61 | */ |
58 | struct the_nilfs *alloc_nilfs(struct block_device *bdev) | 62 | static struct the_nilfs *alloc_nilfs(struct block_device *bdev) |
59 | { | 63 | { |
60 | struct the_nilfs *nilfs; | 64 | struct the_nilfs *nilfs; |
61 | 65 | ||
@@ -68,7 +72,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) | |||
68 | atomic_set(&nilfs->ns_writer_refcount, -1); | 72 | atomic_set(&nilfs->ns_writer_refcount, -1); |
69 | atomic_set(&nilfs->ns_ndirtyblks, 0); | 73 | atomic_set(&nilfs->ns_ndirtyblks, 0); |
70 | init_rwsem(&nilfs->ns_sem); | 74 | init_rwsem(&nilfs->ns_sem); |
75 | init_rwsem(&nilfs->ns_super_sem); | ||
76 | mutex_init(&nilfs->ns_mount_mutex); | ||
71 | mutex_init(&nilfs->ns_writer_mutex); | 77 | mutex_init(&nilfs->ns_writer_mutex); |
78 | INIT_LIST_HEAD(&nilfs->ns_list); | ||
72 | INIT_LIST_HEAD(&nilfs->ns_supers); | 79 | INIT_LIST_HEAD(&nilfs->ns_supers); |
73 | spin_lock_init(&nilfs->ns_last_segment_lock); | 80 | spin_lock_init(&nilfs->ns_last_segment_lock); |
74 | nilfs->ns_gc_inodes_h = NULL; | 81 | nilfs->ns_gc_inodes_h = NULL; |
@@ -78,6 +85,45 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) | |||
78 | } | 85 | } |
79 | 86 | ||
80 | /** | 87 | /** |
88 | * find_or_create_nilfs - find or create nilfs object | ||
89 | * @bdev: block device to which the_nilfs is related | ||
90 | * | ||
91 | * find_nilfs() looks up an existent nilfs object created on the | ||
92 | * device and gets the reference count of the object. If no nilfs object | ||
93 | * is found on the device, a new nilfs object is allocated. | ||
94 | * | ||
95 | * Return Value: On success, pointer to the nilfs object is returned. | ||
96 | * On error, NULL is returned. | ||
97 | */ | ||
98 | struct the_nilfs *find_or_create_nilfs(struct block_device *bdev) | ||
99 | { | ||
100 | struct the_nilfs *nilfs, *new = NULL; | ||
101 | |||
102 | retry: | ||
103 | spin_lock(&nilfs_lock); | ||
104 | list_for_each_entry(nilfs, &nilfs_objects, ns_list) { | ||
105 | if (nilfs->ns_bdev == bdev) { | ||
106 | get_nilfs(nilfs); | ||
107 | spin_unlock(&nilfs_lock); | ||
108 | if (new) | ||
109 | put_nilfs(new); | ||
110 | return nilfs; /* existing object */ | ||
111 | } | ||
112 | } | ||
113 | if (new) { | ||
114 | list_add_tail(&new->ns_list, &nilfs_objects); | ||
115 | spin_unlock(&nilfs_lock); | ||
116 | return new; /* new object */ | ||
117 | } | ||
118 | spin_unlock(&nilfs_lock); | ||
119 | |||
120 | new = alloc_nilfs(bdev); | ||
121 | if (new) | ||
122 | goto retry; | ||
123 | return NULL; /* insufficient memory */ | ||
124 | } | ||
125 | |||
126 | /** | ||
81 | * put_nilfs - release a reference to the_nilfs | 127 | * put_nilfs - release a reference to the_nilfs |
82 | * @nilfs: the_nilfs structure to be released | 128 | * @nilfs: the_nilfs structure to be released |
83 | * | 129 | * |
@@ -86,13 +132,20 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) | |||
86 | */ | 132 | */ |
87 | void put_nilfs(struct the_nilfs *nilfs) | 133 | void put_nilfs(struct the_nilfs *nilfs) |
88 | { | 134 | { |
89 | if (!atomic_dec_and_test(&nilfs->ns_count)) | 135 | spin_lock(&nilfs_lock); |
136 | if (!atomic_dec_and_test(&nilfs->ns_count)) { | ||
137 | spin_unlock(&nilfs_lock); | ||
90 | return; | 138 | return; |
139 | } | ||
140 | list_del_init(&nilfs->ns_list); | ||
141 | spin_unlock(&nilfs_lock); | ||
142 | |||
91 | /* | 143 | /* |
92 | * Increment of ns_count never occur below because the caller | 144 | * Increment of ns_count never occurs below because the caller |
93 | * of get_nilfs() holds at least one reference to the_nilfs. | 145 | * of get_nilfs() holds at least one reference to the_nilfs. |
94 | * Thus its exclusion control is not required here. | 146 | * Thus its exclusion control is not required here. |
95 | */ | 147 | */ |
148 | |||
96 | might_sleep(); | 149 | might_sleep(); |
97 | if (nilfs_loaded(nilfs)) { | 150 | if (nilfs_loaded(nilfs)) { |
98 | nilfs_mdt_clear(nilfs->ns_sufile); | 151 | nilfs_mdt_clear(nilfs->ns_sufile); |
@@ -515,7 +568,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
515 | 568 | ||
516 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | 569 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); |
517 | if (sb->s_blocksize != blocksize) { | 570 | if (sb->s_blocksize != blocksize) { |
518 | int hw_blocksize = bdev_hardsect_size(sb->s_bdev); | 571 | int hw_blocksize = bdev_logical_block_size(sb->s_bdev); |
519 | 572 | ||
520 | if (blocksize < hw_blocksize) { | 573 | if (blocksize < hw_blocksize) { |
521 | printk(KERN_ERR | 574 | printk(KERN_ERR |
@@ -613,13 +666,63 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs) | |||
613 | return ret; | 666 | return ret; |
614 | } | 667 | } |
615 | 668 | ||
669 | /** | ||
670 | * nilfs_find_sbinfo - find existing nilfs_sb_info structure | ||
671 | * @nilfs: nilfs object | ||
672 | * @rw_mount: mount type (non-zero value for read/write mount) | ||
673 | * @cno: checkpoint number (zero for read-only mount) | ||
674 | * | ||
675 | * nilfs_find_sbinfo() returns the nilfs_sb_info structure which | ||
676 | * @rw_mount and @cno (in case of snapshots) matched. If no instance | ||
677 | * was found, NULL is returned. Although the super block instance can | ||
678 | * be unmounted after this function returns, the nilfs_sb_info struct | ||
679 | * is kept on memory until nilfs_put_sbinfo() is called. | ||
680 | */ | ||
681 | struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs, | ||
682 | int rw_mount, __u64 cno) | ||
683 | { | ||
684 | struct nilfs_sb_info *sbi; | ||
685 | |||
686 | down_read(&nilfs->ns_super_sem); | ||
687 | /* | ||
688 | * The SNAPSHOT flag and sb->s_flags are supposed to be | ||
689 | * protected with nilfs->ns_super_sem. | ||
690 | */ | ||
691 | sbi = nilfs->ns_current; | ||
692 | if (rw_mount) { | ||
693 | if (sbi && !(sbi->s_super->s_flags & MS_RDONLY)) | ||
694 | goto found; /* read/write mount */ | ||
695 | else | ||
696 | goto out; | ||
697 | } else if (cno == 0) { | ||
698 | if (sbi && (sbi->s_super->s_flags & MS_RDONLY)) | ||
699 | goto found; /* read-only mount */ | ||
700 | else | ||
701 | goto out; | ||
702 | } | ||
703 | |||
704 | list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { | ||
705 | if (nilfs_test_opt(sbi, SNAPSHOT) && | ||
706 | sbi->s_snapshot_cno == cno) | ||
707 | goto found; /* snapshot mount */ | ||
708 | } | ||
709 | out: | ||
710 | up_read(&nilfs->ns_super_sem); | ||
711 | return NULL; | ||
712 | |||
713 | found: | ||
714 | atomic_inc(&sbi->s_count); | ||
715 | up_read(&nilfs->ns_super_sem); | ||
716 | return sbi; | ||
717 | } | ||
718 | |||
616 | int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, | 719 | int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, |
617 | int snapshot_mount) | 720 | int snapshot_mount) |
618 | { | 721 | { |
619 | struct nilfs_sb_info *sbi; | 722 | struct nilfs_sb_info *sbi; |
620 | int ret = 0; | 723 | int ret = 0; |
621 | 724 | ||
622 | down_read(&nilfs->ns_sem); | 725 | down_read(&nilfs->ns_super_sem); |
623 | if (cno == 0 || cno > nilfs->ns_cno) | 726 | if (cno == 0 || cno > nilfs->ns_cno) |
624 | goto out_unlock; | 727 | goto out_unlock; |
625 | 728 | ||
@@ -636,6 +739,6 @@ int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, | |||
636 | ret++; | 739 | ret++; |
637 | 740 | ||
638 | out_unlock: | 741 | out_unlock: |
639 | up_read(&nilfs->ns_sem); | 742 | up_read(&nilfs->ns_super_sem); |
640 | return ret; | 743 | return ret; |
641 | } | 744 | } |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 30fe58778d05..e8adbffc626f 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -43,12 +43,16 @@ enum { | |||
43 | * struct the_nilfs - struct to supervise multiple nilfs mount points | 43 | * struct the_nilfs - struct to supervise multiple nilfs mount points |
44 | * @ns_flags: flags | 44 | * @ns_flags: flags |
45 | * @ns_count: reference count | 45 | * @ns_count: reference count |
46 | * @ns_list: list head for nilfs_list | ||
46 | * @ns_bdev: block device | 47 | * @ns_bdev: block device |
47 | * @ns_bdi: backing dev info | 48 | * @ns_bdi: backing dev info |
48 | * @ns_writer: back pointer to writable nilfs_sb_info | 49 | * @ns_writer: back pointer to writable nilfs_sb_info |
49 | * @ns_sem: semaphore for shared states | 50 | * @ns_sem: semaphore for shared states |
51 | * @ns_super_sem: semaphore for global operations across super block instances | ||
52 | * @ns_mount_mutex: mutex protecting mount process of nilfs | ||
50 | * @ns_writer_mutex: mutex protecting ns_writer attach/detach | 53 | * @ns_writer_mutex: mutex protecting ns_writer attach/detach |
51 | * @ns_writer_refcount: number of referrers on ns_writer | 54 | * @ns_writer_refcount: number of referrers on ns_writer |
55 | * @ns_current: back pointer to current mount | ||
52 | * @ns_sbh: buffer heads of on-disk super blocks | 56 | * @ns_sbh: buffer heads of on-disk super blocks |
53 | * @ns_sbp: pointers to super block data | 57 | * @ns_sbp: pointers to super block data |
54 | * @ns_sbwtime: previous write time of super blocks | 58 | * @ns_sbwtime: previous write time of super blocks |
@@ -88,15 +92,24 @@ enum { | |||
88 | struct the_nilfs { | 92 | struct the_nilfs { |
89 | unsigned long ns_flags; | 93 | unsigned long ns_flags; |
90 | atomic_t ns_count; | 94 | atomic_t ns_count; |
95 | struct list_head ns_list; | ||
91 | 96 | ||
92 | struct block_device *ns_bdev; | 97 | struct block_device *ns_bdev; |
93 | struct backing_dev_info *ns_bdi; | 98 | struct backing_dev_info *ns_bdi; |
94 | struct nilfs_sb_info *ns_writer; | 99 | struct nilfs_sb_info *ns_writer; |
95 | struct rw_semaphore ns_sem; | 100 | struct rw_semaphore ns_sem; |
101 | struct rw_semaphore ns_super_sem; | ||
102 | struct mutex ns_mount_mutex; | ||
96 | struct mutex ns_writer_mutex; | 103 | struct mutex ns_writer_mutex; |
97 | atomic_t ns_writer_refcount; | 104 | atomic_t ns_writer_refcount; |
98 | 105 | ||
99 | /* | 106 | /* |
107 | * components protected by ns_super_sem | ||
108 | */ | ||
109 | struct nilfs_sb_info *ns_current; | ||
110 | struct list_head ns_supers; | ||
111 | |||
112 | /* | ||
100 | * used for | 113 | * used for |
101 | * - loading the latest checkpoint exclusively. | 114 | * - loading the latest checkpoint exclusively. |
102 | * - allocating a new full segment. | 115 | * - allocating a new full segment. |
@@ -108,7 +121,6 @@ struct the_nilfs { | |||
108 | time_t ns_sbwtime[2]; | 121 | time_t ns_sbwtime[2]; |
109 | unsigned ns_sbsize; | 122 | unsigned ns_sbsize; |
110 | unsigned ns_mount_state; | 123 | unsigned ns_mount_state; |
111 | struct list_head ns_supers; | ||
112 | 124 | ||
113 | /* | 125 | /* |
114 | * Following fields are dedicated to a writable FS-instance. | 126 | * Following fields are dedicated to a writable FS-instance. |
@@ -191,11 +203,12 @@ THE_NILFS_FNS(DISCONTINUED, discontinued) | |||
191 | #define NILFS_ALTSB_FREQ 60 /* spare superblock */ | 203 | #define NILFS_ALTSB_FREQ 60 /* spare superblock */ |
192 | 204 | ||
193 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); | 205 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); |
194 | struct the_nilfs *alloc_nilfs(struct block_device *); | 206 | struct the_nilfs *find_or_create_nilfs(struct block_device *); |
195 | void put_nilfs(struct the_nilfs *); | 207 | void put_nilfs(struct the_nilfs *); |
196 | int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); | 208 | int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); |
197 | int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); | 209 | int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); |
198 | int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); | 210 | int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); |
211 | struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); | ||
199 | int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); | 212 | int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); |
200 | int nilfs_near_disk_full(struct the_nilfs *); | 213 | int nilfs_near_disk_full(struct the_nilfs *); |
201 | void nilfs_fall_back_super_block(struct the_nilfs *); | 214 | void nilfs_fall_back_super_block(struct the_nilfs *); |
@@ -238,6 +251,12 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
238 | mutex_unlock(&nilfs->ns_writer_mutex); | 251 | mutex_unlock(&nilfs->ns_writer_mutex); |
239 | } | 252 | } |
240 | 253 | ||
254 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) | ||
255 | { | ||
256 | if (!atomic_dec_and_test(&sbi->s_count)) | ||
257 | kfree(sbi); | ||
258 | } | ||
259 | |||
241 | static inline void | 260 | static inline void |
242 | nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, | 261 | nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, |
243 | sector_t *seg_start, sector_t *seg_end) | 262 | sector_t *seg_start, sector_t *seg_end) |
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 50914d7303c6..31dac7e3b0f1 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig | |||
@@ -1,2 +1,15 @@ | |||
1 | config FSNOTIFY | ||
2 | bool "Filesystem notification backend" | ||
3 | default y | ||
4 | ---help--- | ||
5 | fsnotify is a backend for filesystem notification. fsnotify does | ||
6 | not provide any userspace interface but does provide the basis | ||
7 | needed for other notification schemes such as dnotify, inotify, | ||
8 | and fanotify. | ||
9 | |||
10 | Say Y here to enable fsnotify suport. | ||
11 | |||
12 | If unsure, say Y. | ||
13 | |||
1 | source "fs/notify/dnotify/Kconfig" | 14 | source "fs/notify/dnotify/Kconfig" |
2 | source "fs/notify/inotify/Kconfig" | 15 | source "fs/notify/inotify/Kconfig" |
diff --git a/fs/notify/Makefile b/fs/notify/Makefile index 5a95b6010ce7..0922cc826c46 100644 --- a/fs/notify/Makefile +++ b/fs/notify/Makefile | |||
@@ -1,2 +1,4 @@ | |||
1 | obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o | ||
2 | |||
1 | obj-y += dnotify/ | 3 | obj-y += dnotify/ |
2 | obj-y += inotify/ | 4 | obj-y += inotify/ |
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig index 26adf5dfa646..904ff8d5405a 100644 --- a/fs/notify/dnotify/Kconfig +++ b/fs/notify/dnotify/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config DNOTIFY | 1 | config DNOTIFY |
2 | bool "Dnotify support" | 2 | bool "Dnotify support" |
3 | depends on FSNOTIFY | ||
3 | default y | 4 | default y |
4 | help | 5 | help |
5 | Dnotify is a directory-based per-fd file change notification system | 6 | Dnotify is a directory-based per-fd file change notification system |
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index b0aa2cde80bd..828a889be909 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c | |||
@@ -3,6 +3,9 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2000,2001,2002 Stephen Rothwell | 4 | * Copyright (C) 2000,2001,2002 Stephen Rothwell |
5 | * | 5 | * |
6 | * Copyright (C) 2009 Eric Paris <Red Hat Inc> | ||
7 | * dnotify was largly rewritten to use the new fsnotify infrastructure | ||
8 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | 9 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms of the GNU General Public License as published by the | 10 | * under the terms of the GNU General Public License as published by the |
8 | * Free Software Foundation; either version 2, or (at your option) any | 11 | * Free Software Foundation; either version 2, or (at your option) any |
@@ -21,24 +24,173 @@ | |||
21 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
22 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
23 | #include <linux/fdtable.h> | 26 | #include <linux/fdtable.h> |
27 | #include <linux/fsnotify_backend.h> | ||
24 | 28 | ||
25 | int dir_notify_enable __read_mostly = 1; | 29 | int dir_notify_enable __read_mostly = 1; |
26 | 30 | ||
27 | static struct kmem_cache *dn_cache __read_mostly; | 31 | static struct kmem_cache *dnotify_struct_cache __read_mostly; |
32 | static struct kmem_cache *dnotify_mark_entry_cache __read_mostly; | ||
33 | static struct fsnotify_group *dnotify_group __read_mostly; | ||
34 | static DEFINE_MUTEX(dnotify_mark_mutex); | ||
35 | |||
36 | /* | ||
37 | * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which | ||
38 | * is being watched by dnotify. If multiple userspace applications are watching | ||
39 | * the same directory with dnotify their information is chained in dn | ||
40 | */ | ||
41 | struct dnotify_mark_entry { | ||
42 | struct fsnotify_mark_entry fsn_entry; | ||
43 | struct dnotify_struct *dn; | ||
44 | }; | ||
28 | 45 | ||
29 | static void redo_inode_mask(struct inode *inode) | 46 | /* |
47 | * When a process starts or stops watching an inode the set of events which | ||
48 | * dnotify cares about for that inode may change. This function runs the | ||
49 | * list of everything receiving dnotify events about this directory and calculates | ||
50 | * the set of all those events. After it updates what dnotify is interested in | ||
51 | * it calls the fsnotify function so it can update the set of all events relevant | ||
52 | * to this inode. | ||
53 | */ | ||
54 | static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry) | ||
30 | { | 55 | { |
31 | unsigned long new_mask; | 56 | __u32 new_mask, old_mask; |
32 | struct dnotify_struct *dn; | 57 | struct dnotify_struct *dn; |
58 | struct dnotify_mark_entry *dnentry = container_of(entry, | ||
59 | struct dnotify_mark_entry, | ||
60 | fsn_entry); | ||
61 | |||
62 | assert_spin_locked(&entry->lock); | ||
33 | 63 | ||
64 | old_mask = entry->mask; | ||
34 | new_mask = 0; | 65 | new_mask = 0; |
35 | for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next) | 66 | for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next) |
36 | new_mask |= dn->dn_mask & ~DN_MULTISHOT; | 67 | new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); |
37 | inode->i_dnotify_mask = new_mask; | 68 | entry->mask = new_mask; |
69 | |||
70 | if (old_mask == new_mask) | ||
71 | return; | ||
72 | |||
73 | if (entry->inode) | ||
74 | fsnotify_recalc_inode_mask(entry->inode); | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Mains fsnotify call where events are delivered to dnotify. | ||
79 | * Find the dnotify mark on the relevant inode, run the list of dnotify structs | ||
80 | * on that mark and determine which of them has expressed interest in receiving | ||
81 | * events of this type. When found send the correct process and signal and | ||
82 | * destroy the dnotify struct if it was not registered to receive multiple | ||
83 | * events. | ||
84 | */ | ||
85 | static int dnotify_handle_event(struct fsnotify_group *group, | ||
86 | struct fsnotify_event *event) | ||
87 | { | ||
88 | struct fsnotify_mark_entry *entry = NULL; | ||
89 | struct dnotify_mark_entry *dnentry; | ||
90 | struct inode *to_tell; | ||
91 | struct dnotify_struct *dn; | ||
92 | struct dnotify_struct **prev; | ||
93 | struct fown_struct *fown; | ||
94 | |||
95 | to_tell = event->to_tell; | ||
96 | |||
97 | spin_lock(&to_tell->i_lock); | ||
98 | entry = fsnotify_find_mark_entry(group, to_tell); | ||
99 | spin_unlock(&to_tell->i_lock); | ||
100 | |||
101 | /* unlikely since we alreay passed dnotify_should_send_event() */ | ||
102 | if (unlikely(!entry)) | ||
103 | return 0; | ||
104 | dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); | ||
105 | |||
106 | spin_lock(&entry->lock); | ||
107 | prev = &dnentry->dn; | ||
108 | while ((dn = *prev) != NULL) { | ||
109 | if ((dn->dn_mask & event->mask) == 0) { | ||
110 | prev = &dn->dn_next; | ||
111 | continue; | ||
112 | } | ||
113 | fown = &dn->dn_filp->f_owner; | ||
114 | send_sigio(fown, dn->dn_fd, POLL_MSG); | ||
115 | if (dn->dn_mask & FS_DN_MULTISHOT) | ||
116 | prev = &dn->dn_next; | ||
117 | else { | ||
118 | *prev = dn->dn_next; | ||
119 | kmem_cache_free(dnotify_struct_cache, dn); | ||
120 | dnotify_recalc_inode_mask(entry); | ||
121 | } | ||
122 | } | ||
123 | |||
124 | spin_unlock(&entry->lock); | ||
125 | fsnotify_put_mark(entry); | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Given an inode and mask determine if dnotify would be interested in sending | ||
132 | * userspace notification for that pair. | ||
133 | */ | ||
134 | static bool dnotify_should_send_event(struct fsnotify_group *group, | ||
135 | struct inode *inode, __u32 mask) | ||
136 | { | ||
137 | struct fsnotify_mark_entry *entry; | ||
138 | bool send; | ||
139 | |||
140 | /* !dir_notify_enable should never get here, don't waste time checking | ||
141 | if (!dir_notify_enable) | ||
142 | return 0; */ | ||
143 | |||
144 | /* not a dir, dnotify doesn't care */ | ||
145 | if (!S_ISDIR(inode->i_mode)) | ||
146 | return false; | ||
147 | |||
148 | spin_lock(&inode->i_lock); | ||
149 | entry = fsnotify_find_mark_entry(group, inode); | ||
150 | spin_unlock(&inode->i_lock); | ||
151 | |||
152 | /* no mark means no dnotify watch */ | ||
153 | if (!entry) | ||
154 | return false; | ||
155 | |||
156 | mask = (mask & ~FS_EVENT_ON_CHILD); | ||
157 | send = (mask & entry->mask); | ||
158 | |||
159 | fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */ | ||
160 | |||
161 | return send; | ||
162 | } | ||
163 | |||
164 | static void dnotify_free_mark(struct fsnotify_mark_entry *entry) | ||
165 | { | ||
166 | struct dnotify_mark_entry *dnentry = container_of(entry, | ||
167 | struct dnotify_mark_entry, | ||
168 | fsn_entry); | ||
169 | |||
170 | BUG_ON(dnentry->dn); | ||
171 | |||
172 | kmem_cache_free(dnotify_mark_entry_cache, dnentry); | ||
38 | } | 173 | } |
39 | 174 | ||
175 | static struct fsnotify_ops dnotify_fsnotify_ops = { | ||
176 | .handle_event = dnotify_handle_event, | ||
177 | .should_send_event = dnotify_should_send_event, | ||
178 | .free_group_priv = NULL, | ||
179 | .freeing_mark = NULL, | ||
180 | .free_event_priv = NULL, | ||
181 | }; | ||
182 | |||
183 | /* | ||
184 | * Called every time a file is closed. Looks first for a dnotify mark on the | ||
185 | * inode. If one is found run all of the ->dn entries attached to that | ||
186 | * mark for one relevant to this process closing the file and remove that | ||
187 | * dnotify_struct. If that was the last dnotify_struct also remove the | ||
188 | * fsnotify_mark_entry. | ||
189 | */ | ||
40 | void dnotify_flush(struct file *filp, fl_owner_t id) | 190 | void dnotify_flush(struct file *filp, fl_owner_t id) |
41 | { | 191 | { |
192 | struct fsnotify_mark_entry *entry; | ||
193 | struct dnotify_mark_entry *dnentry; | ||
42 | struct dnotify_struct *dn; | 194 | struct dnotify_struct *dn; |
43 | struct dnotify_struct **prev; | 195 | struct dnotify_struct **prev; |
44 | struct inode *inode; | 196 | struct inode *inode; |
@@ -46,145 +198,243 @@ void dnotify_flush(struct file *filp, fl_owner_t id) | |||
46 | inode = filp->f_path.dentry->d_inode; | 198 | inode = filp->f_path.dentry->d_inode; |
47 | if (!S_ISDIR(inode->i_mode)) | 199 | if (!S_ISDIR(inode->i_mode)) |
48 | return; | 200 | return; |
201 | |||
49 | spin_lock(&inode->i_lock); | 202 | spin_lock(&inode->i_lock); |
50 | prev = &inode->i_dnotify; | 203 | entry = fsnotify_find_mark_entry(dnotify_group, inode); |
204 | spin_unlock(&inode->i_lock); | ||
205 | if (!entry) | ||
206 | return; | ||
207 | dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); | ||
208 | |||
209 | mutex_lock(&dnotify_mark_mutex); | ||
210 | |||
211 | spin_lock(&entry->lock); | ||
212 | prev = &dnentry->dn; | ||
51 | while ((dn = *prev) != NULL) { | 213 | while ((dn = *prev) != NULL) { |
52 | if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { | 214 | if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { |
53 | *prev = dn->dn_next; | 215 | *prev = dn->dn_next; |
54 | redo_inode_mask(inode); | 216 | kmem_cache_free(dnotify_struct_cache, dn); |
55 | kmem_cache_free(dn_cache, dn); | 217 | dnotify_recalc_inode_mask(entry); |
56 | break; | 218 | break; |
57 | } | 219 | } |
58 | prev = &dn->dn_next; | 220 | prev = &dn->dn_next; |
59 | } | 221 | } |
60 | spin_unlock(&inode->i_lock); | 222 | |
223 | spin_unlock(&entry->lock); | ||
224 | |||
225 | /* nothing else could have found us thanks to the dnotify_mark_mutex */ | ||
226 | if (dnentry->dn == NULL) | ||
227 | fsnotify_destroy_mark_by_entry(entry); | ||
228 | |||
229 | fsnotify_recalc_group_mask(dnotify_group); | ||
230 | |||
231 | mutex_unlock(&dnotify_mark_mutex); | ||
232 | |||
233 | fsnotify_put_mark(entry); | ||
234 | } | ||
235 | |||
236 | /* this conversion is done only at watch creation */ | ||
237 | static __u32 convert_arg(unsigned long arg) | ||
238 | { | ||
239 | __u32 new_mask = FS_EVENT_ON_CHILD; | ||
240 | |||
241 | if (arg & DN_MULTISHOT) | ||
242 | new_mask |= FS_DN_MULTISHOT; | ||
243 | if (arg & DN_DELETE) | ||
244 | new_mask |= (FS_DELETE | FS_MOVED_FROM); | ||
245 | if (arg & DN_MODIFY) | ||
246 | new_mask |= FS_MODIFY; | ||
247 | if (arg & DN_ACCESS) | ||
248 | new_mask |= FS_ACCESS; | ||
249 | if (arg & DN_ATTRIB) | ||
250 | new_mask |= FS_ATTRIB; | ||
251 | if (arg & DN_RENAME) | ||
252 | new_mask |= FS_DN_RENAME; | ||
253 | if (arg & DN_CREATE) | ||
254 | new_mask |= (FS_CREATE | FS_MOVED_TO); | ||
255 | |||
256 | return new_mask; | ||
61 | } | 257 | } |
62 | 258 | ||
259 | /* | ||
260 | * If multiple processes watch the same inode with dnotify there is only one | ||
261 | * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct | ||
262 | * onto that mark. This function either attaches the new dnotify_struct onto | ||
263 | * that list, or it |= the mask onto an existing dnofiy_struct. | ||
264 | */ | ||
265 | static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry, | ||
266 | fl_owner_t id, int fd, struct file *filp, __u32 mask) | ||
267 | { | ||
268 | struct dnotify_struct *odn; | ||
269 | |||
270 | odn = dnentry->dn; | ||
271 | while (odn != NULL) { | ||
272 | /* adding more events to existing dnofiy_struct? */ | ||
273 | if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { | ||
274 | odn->dn_fd = fd; | ||
275 | odn->dn_mask |= mask; | ||
276 | return -EEXIST; | ||
277 | } | ||
278 | odn = odn->dn_next; | ||
279 | } | ||
280 | |||
281 | dn->dn_mask = mask; | ||
282 | dn->dn_fd = fd; | ||
283 | dn->dn_filp = filp; | ||
284 | dn->dn_owner = id; | ||
285 | dn->dn_next = dnentry->dn; | ||
286 | dnentry->dn = dn; | ||
287 | |||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | /* | ||
292 | * When a process calls fcntl to attach a dnotify watch to a directory it ends | ||
293 | * up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be | ||
294 | * attached to the fsnotify_mark. | ||
295 | */ | ||
63 | int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | 296 | int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) |
64 | { | 297 | { |
298 | struct dnotify_mark_entry *new_dnentry, *dnentry; | ||
299 | struct fsnotify_mark_entry *new_entry, *entry; | ||
65 | struct dnotify_struct *dn; | 300 | struct dnotify_struct *dn; |
66 | struct dnotify_struct *odn; | ||
67 | struct dnotify_struct **prev; | ||
68 | struct inode *inode; | 301 | struct inode *inode; |
69 | fl_owner_t id = current->files; | 302 | fl_owner_t id = current->files; |
70 | struct file *f; | 303 | struct file *f; |
71 | int error = 0; | 304 | int destroy = 0, error = 0; |
305 | __u32 mask; | ||
306 | |||
307 | /* we use these to tell if we need to kfree */ | ||
308 | new_entry = NULL; | ||
309 | dn = NULL; | ||
310 | |||
311 | if (!dir_notify_enable) { | ||
312 | error = -EINVAL; | ||
313 | goto out_err; | ||
314 | } | ||
72 | 315 | ||
316 | /* a 0 mask means we are explicitly removing the watch */ | ||
73 | if ((arg & ~DN_MULTISHOT) == 0) { | 317 | if ((arg & ~DN_MULTISHOT) == 0) { |
74 | dnotify_flush(filp, id); | 318 | dnotify_flush(filp, id); |
75 | return 0; | 319 | error = 0; |
320 | goto out_err; | ||
76 | } | 321 | } |
77 | if (!dir_notify_enable) | 322 | |
78 | return -EINVAL; | 323 | /* dnotify only works on directories */ |
79 | inode = filp->f_path.dentry->d_inode; | 324 | inode = filp->f_path.dentry->d_inode; |
80 | if (!S_ISDIR(inode->i_mode)) | 325 | if (!S_ISDIR(inode->i_mode)) { |
81 | return -ENOTDIR; | 326 | error = -ENOTDIR; |
82 | dn = kmem_cache_alloc(dn_cache, GFP_KERNEL); | 327 | goto out_err; |
83 | if (dn == NULL) | ||
84 | return -ENOMEM; | ||
85 | spin_lock(&inode->i_lock); | ||
86 | prev = &inode->i_dnotify; | ||
87 | while ((odn = *prev) != NULL) { | ||
88 | if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { | ||
89 | odn->dn_fd = fd; | ||
90 | odn->dn_mask |= arg; | ||
91 | inode->i_dnotify_mask |= arg & ~DN_MULTISHOT; | ||
92 | goto out_free; | ||
93 | } | ||
94 | prev = &odn->dn_next; | ||
95 | } | 328 | } |
96 | 329 | ||
97 | rcu_read_lock(); | 330 | /* expect most fcntl to add new rather than augment old */ |
98 | f = fcheck(fd); | 331 | dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL); |
99 | rcu_read_unlock(); | 332 | if (!dn) { |
100 | /* we'd lost the race with close(), sod off silently */ | 333 | error = -ENOMEM; |
101 | /* note that inode->i_lock prevents reordering problems | 334 | goto out_err; |
102 | * between accesses to descriptor table and ->i_dnotify */ | 335 | } |
103 | if (f != filp) | ||
104 | goto out_free; | ||
105 | 336 | ||
106 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 337 | /* new fsnotify mark, we expect most fcntl calls to add a new mark */ |
107 | if (error) | 338 | new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL); |
108 | goto out_free; | 339 | if (!new_dnentry) { |
340 | error = -ENOMEM; | ||
341 | goto out_err; | ||
342 | } | ||
109 | 343 | ||
110 | dn->dn_mask = arg; | 344 | /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */ |
111 | dn->dn_fd = fd; | 345 | mask = convert_arg(arg); |
112 | dn->dn_filp = filp; | ||
113 | dn->dn_owner = id; | ||
114 | inode->i_dnotify_mask |= arg & ~DN_MULTISHOT; | ||
115 | dn->dn_next = inode->i_dnotify; | ||
116 | inode->i_dnotify = dn; | ||
117 | spin_unlock(&inode->i_lock); | ||
118 | return 0; | ||
119 | 346 | ||
120 | out_free: | 347 | /* set up the new_entry and new_dnentry */ |
121 | spin_unlock(&inode->i_lock); | 348 | new_entry = &new_dnentry->fsn_entry; |
122 | kmem_cache_free(dn_cache, dn); | 349 | fsnotify_init_mark(new_entry, dnotify_free_mark); |
123 | return error; | 350 | new_entry->mask = mask; |
124 | } | 351 | new_dnentry->dn = NULL; |
125 | 352 | ||
126 | void __inode_dir_notify(struct inode *inode, unsigned long event) | 353 | /* this is needed to prevent the fcntl/close race described below */ |
127 | { | 354 | mutex_lock(&dnotify_mark_mutex); |
128 | struct dnotify_struct * dn; | ||
129 | struct dnotify_struct **prev; | ||
130 | struct fown_struct * fown; | ||
131 | int changed = 0; | ||
132 | 355 | ||
356 | /* add the new_entry or find an old one. */ | ||
133 | spin_lock(&inode->i_lock); | 357 | spin_lock(&inode->i_lock); |
134 | prev = &inode->i_dnotify; | 358 | entry = fsnotify_find_mark_entry(dnotify_group, inode); |
135 | while ((dn = *prev) != NULL) { | ||
136 | if ((dn->dn_mask & event) == 0) { | ||
137 | prev = &dn->dn_next; | ||
138 | continue; | ||
139 | } | ||
140 | fown = &dn->dn_filp->f_owner; | ||
141 | send_sigio(fown, dn->dn_fd, POLL_MSG); | ||
142 | if (dn->dn_mask & DN_MULTISHOT) | ||
143 | prev = &dn->dn_next; | ||
144 | else { | ||
145 | *prev = dn->dn_next; | ||
146 | changed = 1; | ||
147 | kmem_cache_free(dn_cache, dn); | ||
148 | } | ||
149 | } | ||
150 | if (changed) | ||
151 | redo_inode_mask(inode); | ||
152 | spin_unlock(&inode->i_lock); | 359 | spin_unlock(&inode->i_lock); |
153 | } | 360 | if (entry) { |
154 | 361 | dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); | |
155 | EXPORT_SYMBOL(__inode_dir_notify); | 362 | spin_lock(&entry->lock); |
363 | } else { | ||
364 | fsnotify_add_mark(new_entry, dnotify_group, inode); | ||
365 | spin_lock(&new_entry->lock); | ||
366 | entry = new_entry; | ||
367 | dnentry = new_dnentry; | ||
368 | /* we used new_entry, so don't free it */ | ||
369 | new_entry = NULL; | ||
370 | } | ||
156 | 371 | ||
157 | /* | 372 | rcu_read_lock(); |
158 | * This is hopelessly wrong, but unfixable without API changes. At | 373 | f = fcheck(fd); |
159 | * least it doesn't oops the kernel... | 374 | rcu_read_unlock(); |
160 | * | ||
161 | * To safely access ->d_parent we need to keep d_move away from it. Use the | ||
162 | * dentry's d_lock for this. | ||
163 | */ | ||
164 | void dnotify_parent(struct dentry *dentry, unsigned long event) | ||
165 | { | ||
166 | struct dentry *parent; | ||
167 | 375 | ||
168 | if (!dir_notify_enable) | 376 | /* if (f != filp) means that we lost a race and another task/thread |
169 | return; | 377 | * actually closed the fd we are still playing with before we grabbed |
378 | * the dnotify_mark_mutex and entry->lock. Since closing the fd is the | ||
379 | * only time we clean up the mark entries we need to get our mark off | ||
380 | * the list. */ | ||
381 | if (f != filp) { | ||
382 | /* if we added ourselves, shoot ourselves, it's possible that | ||
383 | * the flush actually did shoot this entry. That's fine too | ||
384 | * since multiple calls to destroy_mark is perfectly safe, if | ||
385 | * we found a dnentry already attached to the inode, just sod | ||
386 | * off silently as the flush at close time dealt with it. | ||
387 | */ | ||
388 | if (dnentry == new_dnentry) | ||
389 | destroy = 1; | ||
390 | goto out; | ||
391 | } | ||
170 | 392 | ||
171 | spin_lock(&dentry->d_lock); | 393 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
172 | parent = dentry->d_parent; | 394 | if (error) { |
173 | if (parent->d_inode->i_dnotify_mask & event) { | 395 | /* if we added, we must shoot */ |
174 | dget(parent); | 396 | if (dnentry == new_dnentry) |
175 | spin_unlock(&dentry->d_lock); | 397 | destroy = 1; |
176 | __inode_dir_notify(parent->d_inode, event); | 398 | goto out; |
177 | dput(parent); | ||
178 | } else { | ||
179 | spin_unlock(&dentry->d_lock); | ||
180 | } | 399 | } |
400 | |||
401 | error = attach_dn(dn, dnentry, id, fd, filp, mask); | ||
402 | /* !error means that we attached the dn to the dnentry, so don't free it */ | ||
403 | if (!error) | ||
404 | dn = NULL; | ||
405 | /* -EEXIST means that we didn't add this new dn and used an old one. | ||
406 | * that isn't an error (and the unused dn should be freed) */ | ||
407 | else if (error == -EEXIST) | ||
408 | error = 0; | ||
409 | |||
410 | dnotify_recalc_inode_mask(entry); | ||
411 | out: | ||
412 | spin_unlock(&entry->lock); | ||
413 | |||
414 | if (destroy) | ||
415 | fsnotify_destroy_mark_by_entry(entry); | ||
416 | |||
417 | fsnotify_recalc_group_mask(dnotify_group); | ||
418 | |||
419 | mutex_unlock(&dnotify_mark_mutex); | ||
420 | fsnotify_put_mark(entry); | ||
421 | out_err: | ||
422 | if (new_entry) | ||
423 | fsnotify_put_mark(new_entry); | ||
424 | if (dn) | ||
425 | kmem_cache_free(dnotify_struct_cache, dn); | ||
426 | return error; | ||
181 | } | 427 | } |
182 | EXPORT_SYMBOL_GPL(dnotify_parent); | ||
183 | 428 | ||
184 | static int __init dnotify_init(void) | 429 | static int __init dnotify_init(void) |
185 | { | 430 | { |
186 | dn_cache = kmem_cache_create("dnotify_cache", | 431 | dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC); |
187 | sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL); | 432 | dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC); |
433 | |||
434 | dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM, | ||
435 | 0, &dnotify_fsnotify_ops); | ||
436 | if (IS_ERR(dnotify_group)) | ||
437 | panic("unable to allocate fsnotify group for dnotify\n"); | ||
188 | return 0; | 438 | return 0; |
189 | } | 439 | } |
190 | 440 | ||
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c new file mode 100644 index 000000000000..ec2f7bd76818 --- /dev/null +++ b/fs/notify/fsnotify.c | |||
@@ -0,0 +1,186 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; see the file COPYING. If not, write to | ||
16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/dcache.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/srcu.h> | ||
24 | |||
25 | #include <linux/fsnotify_backend.h> | ||
26 | #include "fsnotify.h" | ||
27 | |||
28 | /* | ||
29 | * Clear all of the marks on an inode when it is being evicted from core | ||
30 | */ | ||
31 | void __fsnotify_inode_delete(struct inode *inode) | ||
32 | { | ||
33 | fsnotify_clear_marks_by_inode(inode); | ||
34 | } | ||
35 | EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); | ||
36 | |||
37 | /* | ||
38 | * Given an inode, first check if we care what happens to our children. Inotify | ||
39 | * and dnotify both tell their parents about events. If we care about any event | ||
40 | * on a child we run all of our children and set a dentry flag saying that the | ||
41 | * parent cares. Thus when an event happens on a child it can quickly tell if | ||
42 | * if there is a need to find a parent and send the event to the parent. | ||
43 | */ | ||
44 | void __fsnotify_update_child_dentry_flags(struct inode *inode) | ||
45 | { | ||
46 | struct dentry *alias; | ||
47 | int watched; | ||
48 | |||
49 | if (!S_ISDIR(inode->i_mode)) | ||
50 | return; | ||
51 | |||
52 | /* determine if the children should tell inode about their events */ | ||
53 | watched = fsnotify_inode_watches_children(inode); | ||
54 | |||
55 | spin_lock(&dcache_lock); | ||
56 | /* run all of the dentries associated with this inode. Since this is a | ||
57 | * directory, there damn well better only be one item on this list */ | ||
58 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { | ||
59 | struct dentry *child; | ||
60 | |||
61 | /* run all of the children of the original inode and fix their | ||
62 | * d_flags to indicate parental interest (their parent is the | ||
63 | * original inode) */ | ||
64 | list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { | ||
65 | if (!child->d_inode) | ||
66 | continue; | ||
67 | |||
68 | spin_lock(&child->d_lock); | ||
69 | if (watched) | ||
70 | child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; | ||
71 | else | ||
72 | child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; | ||
73 | spin_unlock(&child->d_lock); | ||
74 | } | ||
75 | } | ||
76 | spin_unlock(&dcache_lock); | ||
77 | } | ||
78 | |||
79 | /* Notify this dentry's parent about a child's events. */ | ||
80 | void __fsnotify_parent(struct dentry *dentry, __u32 mask) | ||
81 | { | ||
82 | struct dentry *parent; | ||
83 | struct inode *p_inode; | ||
84 | bool send = false; | ||
85 | bool should_update_children = false; | ||
86 | |||
87 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) | ||
88 | return; | ||
89 | |||
90 | spin_lock(&dentry->d_lock); | ||
91 | parent = dentry->d_parent; | ||
92 | p_inode = parent->d_inode; | ||
93 | |||
94 | if (fsnotify_inode_watches_children(p_inode)) { | ||
95 | if (p_inode->i_fsnotify_mask & mask) { | ||
96 | dget(parent); | ||
97 | send = true; | ||
98 | } | ||
99 | } else { | ||
100 | /* | ||
101 | * The parent doesn't care about events on it's children but | ||
102 | * at least one child thought it did. We need to run all the | ||
103 | * children and update their d_flags to let them know p_inode | ||
104 | * doesn't care about them any more. | ||
105 | */ | ||
106 | dget(parent); | ||
107 | should_update_children = true; | ||
108 | } | ||
109 | |||
110 | spin_unlock(&dentry->d_lock); | ||
111 | |||
112 | if (send) { | ||
113 | /* we are notifying a parent so come up with the new mask which | ||
114 | * specifies these are events which came from a child. */ | ||
115 | mask |= FS_EVENT_ON_CHILD; | ||
116 | |||
117 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, | ||
118 | dentry->d_name.name, 0); | ||
119 | dput(parent); | ||
120 | } | ||
121 | |||
122 | if (unlikely(should_update_children)) { | ||
123 | __fsnotify_update_child_dentry_flags(p_inode); | ||
124 | dput(parent); | ||
125 | } | ||
126 | } | ||
127 | EXPORT_SYMBOL_GPL(__fsnotify_parent); | ||
128 | |||
129 | /* | ||
130 | * This is the main call to fsnotify. The VFS calls into hook specific functions | ||
131 | * in linux/fsnotify.h. Those functions then in turn call here. Here will call | ||
132 | * out to all of the registered fsnotify_group. Those groups can then use the | ||
133 | * notification event in whatever means they feel necessary. | ||
134 | */ | ||
135 | void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie) | ||
136 | { | ||
137 | struct fsnotify_group *group; | ||
138 | struct fsnotify_event *event = NULL; | ||
139 | int idx; | ||
140 | /* global tests shouldn't care about events on child only the specific event */ | ||
141 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); | ||
142 | |||
143 | if (list_empty(&fsnotify_groups)) | ||
144 | return; | ||
145 | |||
146 | if (!(test_mask & fsnotify_mask)) | ||
147 | return; | ||
148 | |||
149 | if (!(test_mask & to_tell->i_fsnotify_mask)) | ||
150 | return; | ||
151 | /* | ||
152 | * SRCU!! the groups list is very very much read only and the path is | ||
153 | * very hot. The VAST majority of events are not going to need to do | ||
154 | * anything other than walk the list so it's crazy to pre-allocate. | ||
155 | */ | ||
156 | idx = srcu_read_lock(&fsnotify_grp_srcu); | ||
157 | list_for_each_entry_rcu(group, &fsnotify_groups, group_list) { | ||
158 | if (test_mask & group->mask) { | ||
159 | if (!group->ops->should_send_event(group, to_tell, mask)) | ||
160 | continue; | ||
161 | if (!event) { | ||
162 | event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie); | ||
163 | /* shit, we OOM'd and now we can't tell, maybe | ||
164 | * someday someone else will want to do something | ||
165 | * here */ | ||
166 | if (!event) | ||
167 | break; | ||
168 | } | ||
169 | group->ops->handle_event(group, event); | ||
170 | } | ||
171 | } | ||
172 | srcu_read_unlock(&fsnotify_grp_srcu, idx); | ||
173 | /* | ||
174 | * fsnotify_create_event() took a reference so the event can't be cleaned | ||
175 | * up while we are still trying to add it to lists, drop that one. | ||
176 | */ | ||
177 | if (event) | ||
178 | fsnotify_put_event(event); | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(fsnotify); | ||
181 | |||
182 | static __init int fsnotify_init(void) | ||
183 | { | ||
184 | return init_srcu_struct(&fsnotify_grp_srcu); | ||
185 | } | ||
186 | subsys_initcall(fsnotify_init); | ||
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h new file mode 100644 index 000000000000..4dc240824b2d --- /dev/null +++ b/fs/notify/fsnotify.h | |||
@@ -0,0 +1,34 @@ | |||
1 | #ifndef __FS_NOTIFY_FSNOTIFY_H_ | ||
2 | #define __FS_NOTIFY_FSNOTIFY_H_ | ||
3 | |||
4 | #include <linux/list.h> | ||
5 | #include <linux/fsnotify.h> | ||
6 | #include <linux/srcu.h> | ||
7 | #include <linux/types.h> | ||
8 | |||
9 | /* protects reads of fsnotify_groups */ | ||
10 | extern struct srcu_struct fsnotify_grp_srcu; | ||
11 | /* all groups which receive fsnotify events */ | ||
12 | extern struct list_head fsnotify_groups; | ||
13 | /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */ | ||
14 | extern __u32 fsnotify_mask; | ||
15 | |||
16 | /* destroy all events sitting in this groups notification queue */ | ||
17 | extern void fsnotify_flush_notify(struct fsnotify_group *group); | ||
18 | |||
19 | /* final kfree of a group */ | ||
20 | extern void fsnotify_final_destroy_group(struct fsnotify_group *group); | ||
21 | |||
22 | /* run the list of all marks associated with inode and flag them to be freed */ | ||
23 | extern void fsnotify_clear_marks_by_inode(struct inode *inode); | ||
24 | /* | ||
25 | * update the dentry->d_flags of all of inode's children to indicate if inode cares | ||
26 | * about events that happen to its children. | ||
27 | */ | ||
28 | extern void __fsnotify_update_child_dentry_flags(struct inode *inode); | ||
29 | |||
30 | /* allocate and destroy and event holder to attach events to notification/access queues */ | ||
31 | extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void); | ||
32 | extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder); | ||
33 | |||
34 | #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ | ||
diff --git a/fs/notify/group.c b/fs/notify/group.c new file mode 100644 index 000000000000..0e1677144bc5 --- /dev/null +++ b/fs/notify/group.c | |||
@@ -0,0 +1,254 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; see the file COPYING. If not, write to | ||
16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/list.h> | ||
20 | #include <linux/mutex.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/srcu.h> | ||
23 | #include <linux/rculist.h> | ||
24 | #include <linux/wait.h> | ||
25 | |||
26 | #include <linux/fsnotify_backend.h> | ||
27 | #include "fsnotify.h" | ||
28 | |||
29 | #include <asm/atomic.h> | ||
30 | |||
31 | /* protects writes to fsnotify_groups and fsnotify_mask */ | ||
32 | static DEFINE_MUTEX(fsnotify_grp_mutex); | ||
33 | /* protects reads while running the fsnotify_groups list */ | ||
34 | struct srcu_struct fsnotify_grp_srcu; | ||
35 | /* all groups registered to receive filesystem notifications */ | ||
36 | LIST_HEAD(fsnotify_groups); | ||
37 | /* bitwise OR of all events (FS_*) interesting to some group on this system */ | ||
38 | __u32 fsnotify_mask; | ||
39 | |||
40 | /* | ||
41 | * When a new group registers or changes it's set of interesting events | ||
42 | * this function updates the fsnotify_mask to contain all interesting events | ||
43 | */ | ||
44 | void fsnotify_recalc_global_mask(void) | ||
45 | { | ||
46 | struct fsnotify_group *group; | ||
47 | __u32 mask = 0; | ||
48 | int idx; | ||
49 | |||
50 | idx = srcu_read_lock(&fsnotify_grp_srcu); | ||
51 | list_for_each_entry_rcu(group, &fsnotify_groups, group_list) | ||
52 | mask |= group->mask; | ||
53 | srcu_read_unlock(&fsnotify_grp_srcu, idx); | ||
54 | fsnotify_mask = mask; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Update the group->mask by running all of the marks associated with this | ||
59 | * group and finding the bitwise | of all of the mark->mask. If we change | ||
60 | * the group->mask we need to update the global mask of events interesting | ||
61 | * to the system. | ||
62 | */ | ||
63 | void fsnotify_recalc_group_mask(struct fsnotify_group *group) | ||
64 | { | ||
65 | __u32 mask = 0; | ||
66 | __u32 old_mask = group->mask; | ||
67 | struct fsnotify_mark_entry *entry; | ||
68 | |||
69 | spin_lock(&group->mark_lock); | ||
70 | list_for_each_entry(entry, &group->mark_entries, g_list) | ||
71 | mask |= entry->mask; | ||
72 | spin_unlock(&group->mark_lock); | ||
73 | |||
74 | group->mask = mask; | ||
75 | |||
76 | if (old_mask != mask) | ||
77 | fsnotify_recalc_global_mask(); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Take a reference to a group so things found under the fsnotify_grp_mutex | ||
82 | * can't get freed under us | ||
83 | */ | ||
84 | static void fsnotify_get_group(struct fsnotify_group *group) | ||
85 | { | ||
86 | atomic_inc(&group->refcnt); | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Final freeing of a group | ||
91 | */ | ||
92 | void fsnotify_final_destroy_group(struct fsnotify_group *group) | ||
93 | { | ||
94 | /* clear the notification queue of all events */ | ||
95 | fsnotify_flush_notify(group); | ||
96 | |||
97 | if (group->ops->free_group_priv) | ||
98 | group->ops->free_group_priv(group); | ||
99 | |||
100 | kfree(group); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Trying to get rid of a group. We need to first get rid of any outstanding | ||
105 | * allocations and then free the group. Remember that fsnotify_clear_marks_by_group | ||
106 | * could miss marks that are being freed by inode and those marks could still | ||
107 | * hold a reference to this group (via group->num_marks) If we get into that | ||
108 | * situtation, the fsnotify_final_destroy_group will get called when that final | ||
109 | * mark is freed. | ||
110 | */ | ||
111 | static void fsnotify_destroy_group(struct fsnotify_group *group) | ||
112 | { | ||
113 | /* clear all inode mark entries for this group */ | ||
114 | fsnotify_clear_marks_by_group(group); | ||
115 | |||
116 | /* past the point of no return, matches the initial value of 1 */ | ||
117 | if (atomic_dec_and_test(&group->num_marks)) | ||
118 | fsnotify_final_destroy_group(group); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Remove this group from the global list of groups that will get events | ||
123 | * this can be done even if there are still references and things still using | ||
124 | * this group. This just stops the group from getting new events. | ||
125 | */ | ||
126 | static void __fsnotify_evict_group(struct fsnotify_group *group) | ||
127 | { | ||
128 | BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); | ||
129 | |||
130 | if (group->on_group_list) | ||
131 | list_del_rcu(&group->group_list); | ||
132 | group->on_group_list = 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Called when a group is no longer interested in getting events. This can be | ||
137 | * used if a group is misbehaving or if for some reason a group should no longer | ||
138 | * get any filesystem events. | ||
139 | */ | ||
140 | void fsnotify_evict_group(struct fsnotify_group *group) | ||
141 | { | ||
142 | mutex_lock(&fsnotify_grp_mutex); | ||
143 | __fsnotify_evict_group(group); | ||
144 | mutex_unlock(&fsnotify_grp_mutex); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Drop a reference to a group. Free it if it's through. | ||
149 | */ | ||
150 | void fsnotify_put_group(struct fsnotify_group *group) | ||
151 | { | ||
152 | if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex)) | ||
153 | return; | ||
154 | |||
155 | /* | ||
156 | * OK, now we know that there's no other users *and* we hold mutex, | ||
157 | * so no new references will appear | ||
158 | */ | ||
159 | __fsnotify_evict_group(group); | ||
160 | |||
161 | /* | ||
162 | * now it's off the list, so the only thing we might care about is | ||
163 | * srcu access.... | ||
164 | */ | ||
165 | mutex_unlock(&fsnotify_grp_mutex); | ||
166 | synchronize_srcu(&fsnotify_grp_srcu); | ||
167 | |||
168 | /* and now it is really dead. _Nothing_ could be seeing it */ | ||
169 | fsnotify_recalc_global_mask(); | ||
170 | fsnotify_destroy_group(group); | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Simply run the fsnotify_groups list and find a group which matches | ||
175 | * the given parameters. If a group is found we take a reference to that | ||
176 | * group. | ||
177 | */ | ||
178 | static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask, | ||
179 | const struct fsnotify_ops *ops) | ||
180 | { | ||
181 | struct fsnotify_group *group_iter; | ||
182 | struct fsnotify_group *group = NULL; | ||
183 | |||
184 | BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); | ||
185 | |||
186 | list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) { | ||
187 | if (group_iter->group_num == group_num) { | ||
188 | if ((group_iter->mask == mask) && | ||
189 | (group_iter->ops == ops)) { | ||
190 | fsnotify_get_group(group_iter); | ||
191 | group = group_iter; | ||
192 | } else | ||
193 | group = ERR_PTR(-EEXIST); | ||
194 | } | ||
195 | } | ||
196 | return group; | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * Either finds an existing group which matches the group_num, mask, and ops or | ||
201 | * creates a new group and adds it to the global group list. In either case we | ||
202 | * take a reference for the group returned. | ||
203 | */ | ||
204 | struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, | ||
205 | const struct fsnotify_ops *ops) | ||
206 | { | ||
207 | struct fsnotify_group *group, *tgroup; | ||
208 | |||
209 | /* very low use, simpler locking if we just always alloc */ | ||
210 | group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL); | ||
211 | if (!group) | ||
212 | return ERR_PTR(-ENOMEM); | ||
213 | |||
214 | atomic_set(&group->refcnt, 1); | ||
215 | |||
216 | group->on_group_list = 0; | ||
217 | group->group_num = group_num; | ||
218 | group->mask = mask; | ||
219 | |||
220 | mutex_init(&group->notification_mutex); | ||
221 | INIT_LIST_HEAD(&group->notification_list); | ||
222 | init_waitqueue_head(&group->notification_waitq); | ||
223 | group->q_len = 0; | ||
224 | group->max_events = UINT_MAX; | ||
225 | |||
226 | spin_lock_init(&group->mark_lock); | ||
227 | atomic_set(&group->num_marks, 0); | ||
228 | INIT_LIST_HEAD(&group->mark_entries); | ||
229 | |||
230 | group->ops = ops; | ||
231 | |||
232 | mutex_lock(&fsnotify_grp_mutex); | ||
233 | tgroup = fsnotify_find_group(group_num, mask, ops); | ||
234 | if (tgroup) { | ||
235 | /* group already exists */ | ||
236 | mutex_unlock(&fsnotify_grp_mutex); | ||
237 | /* destroy the new one we made */ | ||
238 | fsnotify_put_group(group); | ||
239 | return tgroup; | ||
240 | } | ||
241 | |||
242 | /* group not found, add a new one */ | ||
243 | list_add_rcu(&group->group_list, &fsnotify_groups); | ||
244 | group->on_group_list = 1; | ||
245 | /* being on the fsnotify_groups list holds one num_marks */ | ||
246 | atomic_inc(&group->num_marks); | ||
247 | |||
248 | mutex_unlock(&fsnotify_grp_mutex); | ||
249 | |||
250 | if (mask) | ||
251 | fsnotify_recalc_global_mask(); | ||
252 | |||
253 | return group; | ||
254 | } | ||
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c new file mode 100644 index 000000000000..c8a07c65482b --- /dev/null +++ b/fs/notify/inode_mark.c | |||
@@ -0,0 +1,426 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; see the file COPYING. If not, write to | ||
16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
17 | */ | ||
18 | |||
19 | /* | ||
20 | * fsnotify inode mark locking/lifetime/and refcnting | ||
21 | * | ||
22 | * REFCNT: | ||
23 | * The mark->refcnt tells how many "things" in the kernel currently are | ||
24 | * referencing this object. The object typically will live inside the kernel | ||
25 | * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task | ||
26 | * which can find this object holding the appropriete locks, can take a reference | ||
27 | * and the object itself is guarenteed to survive until the reference is dropped. | ||
28 | * | ||
29 | * LOCKING: | ||
30 | * There are 3 spinlocks involved with fsnotify inode marks and they MUST | ||
31 | * be taken in order as follows: | ||
32 | * | ||
33 | * entry->lock | ||
34 | * group->mark_lock | ||
35 | * inode->i_lock | ||
36 | * | ||
37 | * entry->lock protects 2 things, entry->group and entry->inode. You must hold | ||
38 | * that lock to dereference either of these things (they could be NULL even with | ||
39 | * the lock) | ||
40 | * | ||
41 | * group->mark_lock protects the mark_entries list anchored inside a given group | ||
42 | * and each entry is hooked via the g_list. It also sorta protects the | ||
43 | * free_g_list, which when used is anchored by a private list on the stack of the | ||
44 | * task which held the group->mark_lock. | ||
45 | * | ||
46 | * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a | ||
47 | * given inode and each entry is hooked via the i_list. (and sorta the | ||
48 | * free_i_list) | ||
49 | * | ||
50 | * | ||
51 | * LIFETIME: | ||
52 | * Inode marks survive between when they are added to an inode and when their | ||
53 | * refcnt==0. | ||
54 | * | ||
55 | * The inode mark can be cleared for a number of different reasons including: | ||
56 | * - The inode is unlinked for the last time. (fsnotify_inode_remove) | ||
57 | * - The inode is being evicted from cache. (fsnotify_inode_delete) | ||
58 | * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) | ||
59 | * - Something explicitly requests that it be removed. (fsnotify_destroy_mark_by_entry) | ||
60 | * - The fsnotify_group associated with the mark is going away and all such marks | ||
61 | * need to be cleaned up. (fsnotify_clear_marks_by_group) | ||
62 | * | ||
63 | * Worst case we are given an inode and need to clean up all the marks on that | ||
64 | * inode. We take i_lock and walk the i_fsnotify_mark_entries safely. For each | ||
65 | * mark on the list we take a reference (so the mark can't disappear under us). | ||
66 | * We remove that mark form the inode's list of marks and we add this mark to a | ||
67 | * private list anchored on the stack using i_free_list; At this point we no | ||
68 | * longer fear anything finding the mark using the inode's list of marks. | ||
69 | * | ||
70 | * We can safely and locklessly run the private list on the stack of everything | ||
71 | * we just unattached from the original inode. For each mark on the private list | ||
72 | * we grab the mark-> and can thus dereference mark->group and mark->inode. If | ||
73 | * we see the group and inode are not NULL we take those locks. Now holding all | ||
74 | * 3 locks we can completely remove the mark from other tasks finding it in the | ||
75 | * future. Remember, 10 things might already be referencing this mark, but they | ||
76 | * better be holding a ref. We drop our reference we took before we unhooked it | ||
77 | * from the inode. When the ref hits 0 we can free the mark. | ||
78 | * | ||
79 | * Very similarly for freeing by group, except we use free_g_list. | ||
80 | * | ||
81 | * This has the very interesting property of being able to run concurrently with | ||
82 | * any (or all) other directions. | ||
83 | */ | ||
84 | |||
85 | #include <linux/fs.h> | ||
86 | #include <linux/init.h> | ||
87 | #include <linux/kernel.h> | ||
88 | #include <linux/module.h> | ||
89 | #include <linux/mutex.h> | ||
90 | #include <linux/slab.h> | ||
91 | #include <linux/spinlock.h> | ||
92 | #include <linux/writeback.h> /* for inode_lock */ | ||
93 | |||
94 | #include <asm/atomic.h> | ||
95 | |||
96 | #include <linux/fsnotify_backend.h> | ||
97 | #include "fsnotify.h" | ||
98 | |||
99 | void fsnotify_get_mark(struct fsnotify_mark_entry *entry) | ||
100 | { | ||
101 | atomic_inc(&entry->refcnt); | ||
102 | } | ||
103 | |||
104 | void fsnotify_put_mark(struct fsnotify_mark_entry *entry) | ||
105 | { | ||
106 | if (atomic_dec_and_test(&entry->refcnt)) | ||
107 | entry->free_mark(entry); | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Recalculate the mask of events relevant to a given inode locked. | ||
112 | */ | ||
113 | static void fsnotify_recalc_inode_mask_locked(struct inode *inode) | ||
114 | { | ||
115 | struct fsnotify_mark_entry *entry; | ||
116 | struct hlist_node *pos; | ||
117 | __u32 new_mask = 0; | ||
118 | |||
119 | assert_spin_locked(&inode->i_lock); | ||
120 | |||
121 | hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) | ||
122 | new_mask |= entry->mask; | ||
123 | inode->i_fsnotify_mask = new_mask; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types | ||
128 | * any notifier is interested in hearing for this inode. | ||
129 | */ | ||
130 | void fsnotify_recalc_inode_mask(struct inode *inode) | ||
131 | { | ||
132 | spin_lock(&inode->i_lock); | ||
133 | fsnotify_recalc_inode_mask_locked(inode); | ||
134 | spin_unlock(&inode->i_lock); | ||
135 | |||
136 | __fsnotify_update_child_dentry_flags(inode); | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Any time a mark is getting freed we end up here. | ||
141 | * The caller had better be holding a reference to this mark so we don't actually | ||
142 | * do the final put under the entry->lock | ||
143 | */ | ||
144 | void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry) | ||
145 | { | ||
146 | struct fsnotify_group *group; | ||
147 | struct inode *inode; | ||
148 | |||
149 | spin_lock(&entry->lock); | ||
150 | |||
151 | group = entry->group; | ||
152 | inode = entry->inode; | ||
153 | |||
154 | BUG_ON(group && !inode); | ||
155 | BUG_ON(!group && inode); | ||
156 | |||
157 | /* if !group something else already marked this to die */ | ||
158 | if (!group) { | ||
159 | spin_unlock(&entry->lock); | ||
160 | return; | ||
161 | } | ||
162 | |||
163 | /* 1 from caller and 1 for being on i_list/g_list */ | ||
164 | BUG_ON(atomic_read(&entry->refcnt) < 2); | ||
165 | |||
166 | spin_lock(&group->mark_lock); | ||
167 | spin_lock(&inode->i_lock); | ||
168 | |||
169 | hlist_del_init(&entry->i_list); | ||
170 | entry->inode = NULL; | ||
171 | |||
172 | list_del_init(&entry->g_list); | ||
173 | entry->group = NULL; | ||
174 | |||
175 | fsnotify_put_mark(entry); /* for i_list and g_list */ | ||
176 | |||
177 | /* | ||
178 | * this mark is now off the inode->i_fsnotify_mark_entries list and we | ||
179 | * hold the inode->i_lock, so this is the perfect time to update the | ||
180 | * inode->i_fsnotify_mask | ||
181 | */ | ||
182 | fsnotify_recalc_inode_mask_locked(inode); | ||
183 | |||
184 | spin_unlock(&inode->i_lock); | ||
185 | spin_unlock(&group->mark_lock); | ||
186 | spin_unlock(&entry->lock); | ||
187 | |||
188 | /* | ||
189 | * Some groups like to know that marks are being freed. This is a | ||
190 | * callback to the group function to let it know that this entry | ||
191 | * is being freed. | ||
192 | */ | ||
193 | if (group->ops->freeing_mark) | ||
194 | group->ops->freeing_mark(entry, group); | ||
195 | |||
196 | /* | ||
197 | * __fsnotify_update_child_dentry_flags(inode); | ||
198 | * | ||
199 | * I really want to call that, but we can't, we have no idea if the inode | ||
200 | * still exists the second we drop the entry->lock. | ||
201 | * | ||
202 | * The next time an event arrive to this inode from one of it's children | ||
203 | * __fsnotify_parent will see that the inode doesn't care about it's | ||
204 | * children and will update all of these flags then. So really this | ||
205 | * is just a lazy update (and could be a perf win...) | ||
206 | */ | ||
207 | |||
208 | |||
209 | iput(inode); | ||
210 | |||
211 | /* | ||
212 | * it's possible that this group tried to destroy itself, but this | ||
213 | * this mark was simultaneously being freed by inode. If that's the | ||
214 | * case, we finish freeing the group here. | ||
215 | */ | ||
216 | if (unlikely(atomic_dec_and_test(&group->num_marks))) | ||
217 | fsnotify_final_destroy_group(group); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * Given a group, destroy all of the marks associated with that group. | ||
222 | */ | ||
223 | void fsnotify_clear_marks_by_group(struct fsnotify_group *group) | ||
224 | { | ||
225 | struct fsnotify_mark_entry *lentry, *entry; | ||
226 | LIST_HEAD(free_list); | ||
227 | |||
228 | spin_lock(&group->mark_lock); | ||
229 | list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) { | ||
230 | list_add(&entry->free_g_list, &free_list); | ||
231 | list_del_init(&entry->g_list); | ||
232 | fsnotify_get_mark(entry); | ||
233 | } | ||
234 | spin_unlock(&group->mark_lock); | ||
235 | |||
236 | list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) { | ||
237 | fsnotify_destroy_mark_by_entry(entry); | ||
238 | fsnotify_put_mark(entry); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | /* | ||
243 | * Given an inode, destroy all of the marks associated with that inode. | ||
244 | */ | ||
245 | void fsnotify_clear_marks_by_inode(struct inode *inode) | ||
246 | { | ||
247 | struct fsnotify_mark_entry *entry, *lentry; | ||
248 | struct hlist_node *pos, *n; | ||
249 | LIST_HEAD(free_list); | ||
250 | |||
251 | spin_lock(&inode->i_lock); | ||
252 | hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) { | ||
253 | list_add(&entry->free_i_list, &free_list); | ||
254 | hlist_del_init(&entry->i_list); | ||
255 | fsnotify_get_mark(entry); | ||
256 | } | ||
257 | spin_unlock(&inode->i_lock); | ||
258 | |||
259 | list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) { | ||
260 | fsnotify_destroy_mark_by_entry(entry); | ||
261 | fsnotify_put_mark(entry); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * given a group and inode, find the mark associated with that combination. | ||
267 | * if found take a reference to that mark and return it, else return NULL | ||
268 | */ | ||
269 | struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, | ||
270 | struct inode *inode) | ||
271 | { | ||
272 | struct fsnotify_mark_entry *entry; | ||
273 | struct hlist_node *pos; | ||
274 | |||
275 | assert_spin_locked(&inode->i_lock); | ||
276 | |||
277 | hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) { | ||
278 | if (entry->group == group) { | ||
279 | fsnotify_get_mark(entry); | ||
280 | return entry; | ||
281 | } | ||
282 | } | ||
283 | return NULL; | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Nothing fancy, just initialize lists and locks and counters. | ||
288 | */ | ||
289 | void fsnotify_init_mark(struct fsnotify_mark_entry *entry, | ||
290 | void (*free_mark)(struct fsnotify_mark_entry *entry)) | ||
291 | |||
292 | { | ||
293 | spin_lock_init(&entry->lock); | ||
294 | atomic_set(&entry->refcnt, 1); | ||
295 | INIT_HLIST_NODE(&entry->i_list); | ||
296 | entry->group = NULL; | ||
297 | entry->mask = 0; | ||
298 | entry->inode = NULL; | ||
299 | entry->free_mark = free_mark; | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * Attach an initialized mark entry to a given group and inode. | ||
304 | * These marks may be used for the fsnotify backend to determine which | ||
305 | * event types should be delivered to which group and for which inodes. | ||
306 | */ | ||
307 | int fsnotify_add_mark(struct fsnotify_mark_entry *entry, | ||
308 | struct fsnotify_group *group, struct inode *inode) | ||
309 | { | ||
310 | struct fsnotify_mark_entry *lentry; | ||
311 | int ret = 0; | ||
312 | |||
313 | inode = igrab(inode); | ||
314 | if (unlikely(!inode)) | ||
315 | return -EINVAL; | ||
316 | |||
317 | /* | ||
318 | * LOCKING ORDER!!!! | ||
319 | * entry->lock | ||
320 | * group->mark_lock | ||
321 | * inode->i_lock | ||
322 | */ | ||
323 | spin_lock(&entry->lock); | ||
324 | spin_lock(&group->mark_lock); | ||
325 | spin_lock(&inode->i_lock); | ||
326 | |||
327 | entry->group = group; | ||
328 | entry->inode = inode; | ||
329 | |||
330 | lentry = fsnotify_find_mark_entry(group, inode); | ||
331 | if (!lentry) { | ||
332 | hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries); | ||
333 | list_add(&entry->g_list, &group->mark_entries); | ||
334 | |||
335 | fsnotify_get_mark(entry); /* for i_list and g_list */ | ||
336 | |||
337 | atomic_inc(&group->num_marks); | ||
338 | |||
339 | fsnotify_recalc_inode_mask_locked(inode); | ||
340 | } | ||
341 | |||
342 | spin_unlock(&inode->i_lock); | ||
343 | spin_unlock(&group->mark_lock); | ||
344 | spin_unlock(&entry->lock); | ||
345 | |||
346 | if (lentry) { | ||
347 | ret = -EEXIST; | ||
348 | iput(inode); | ||
349 | fsnotify_put_mark(lentry); | ||
350 | } else { | ||
351 | __fsnotify_update_child_dentry_flags(inode); | ||
352 | } | ||
353 | |||
354 | return ret; | ||
355 | } | ||
356 | |||
357 | /** | ||
358 | * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. | ||
359 | * @list: list of inodes being unmounted (sb->s_inodes) | ||
360 | * | ||
361 | * Called with inode_lock held, protecting the unmounting super block's list | ||
362 | * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. | ||
363 | * We temporarily drop inode_lock, however, and CAN block. | ||
364 | */ | ||
365 | void fsnotify_unmount_inodes(struct list_head *list) | ||
366 | { | ||
367 | struct inode *inode, *next_i, *need_iput = NULL; | ||
368 | |||
369 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { | ||
370 | struct inode *need_iput_tmp; | ||
371 | |||
372 | /* | ||
373 | * We cannot __iget() an inode in state I_CLEAR, I_FREEING, | ||
374 | * I_WILL_FREE, or I_NEW which is fine because by that point | ||
375 | * the inode cannot have any associated watches. | ||
376 | */ | ||
377 | if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) | ||
378 | continue; | ||
379 | |||
380 | /* | ||
381 | * If i_count is zero, the inode cannot have any watches and | ||
382 | * doing an __iget/iput with MS_ACTIVE clear would actually | ||
383 | * evict all inodes with zero i_count from icache which is | ||
384 | * unnecessarily violent and may in fact be illegal to do. | ||
385 | */ | ||
386 | if (!atomic_read(&inode->i_count)) | ||
387 | continue; | ||
388 | |||
389 | need_iput_tmp = need_iput; | ||
390 | need_iput = NULL; | ||
391 | |||
392 | /* In case fsnotify_inode_delete() drops a reference. */ | ||
393 | if (inode != need_iput_tmp) | ||
394 | __iget(inode); | ||
395 | else | ||
396 | need_iput_tmp = NULL; | ||
397 | |||
398 | /* In case the dropping of a reference would nuke next_i. */ | ||
399 | if ((&next_i->i_sb_list != list) && | ||
400 | atomic_read(&next_i->i_count) && | ||
401 | !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { | ||
402 | __iget(next_i); | ||
403 | need_iput = next_i; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * We can safely drop inode_lock here because we hold | ||
408 | * references on both inode and next_i. Also no new inodes | ||
409 | * will be added since the umount has begun. Finally, | ||
410 | * iprune_mutex keeps shrink_icache_memory() away. | ||
411 | */ | ||
412 | spin_unlock(&inode_lock); | ||
413 | |||
414 | if (need_iput_tmp) | ||
415 | iput(need_iput_tmp); | ||
416 | |||
417 | /* for each watch, send FS_UNMOUNT and then remove it */ | ||
418 | fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); | ||
419 | |||
420 | fsnotify_inode_delete(inode); | ||
421 | |||
422 | iput(inode); | ||
423 | |||
424 | spin_lock(&inode_lock); | ||
425 | } | ||
426 | } | ||
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index 446792841023..5356884289a1 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig | |||
@@ -1,26 +1,30 @@ | |||
1 | config INOTIFY | 1 | config INOTIFY |
2 | bool "Inotify file change notification support" | 2 | bool "Inotify file change notification support" |
3 | default y | 3 | default n |
4 | ---help--- | 4 | ---help--- |
5 | Say Y here to enable inotify support. Inotify is a file change | 5 | Say Y here to enable legacy in kernel inotify support. Inotify is a |
6 | notification system and a replacement for dnotify. Inotify fixes | 6 | file change notification system. It is a replacement for dnotify. |
7 | numerous shortcomings in dnotify and introduces several new features | 7 | This option only provides the legacy inotify in kernel API. There |
8 | including multiple file events, one-shot support, and unmount | 8 | are no in tree kernel users of this interface since it is deprecated. |
9 | notification. | 9 | You only need this if you are loading an out of tree kernel module |
10 | that uses inotify. | ||
10 | 11 | ||
11 | For more information, see <file:Documentation/filesystems/inotify.txt> | 12 | For more information, see <file:Documentation/filesystems/inotify.txt> |
12 | 13 | ||
13 | If unsure, say Y. | 14 | If unsure, say N. |
14 | 15 | ||
15 | config INOTIFY_USER | 16 | config INOTIFY_USER |
16 | bool "Inotify support for userspace" | 17 | bool "Inotify support for userspace" |
17 | depends on INOTIFY | 18 | depends on FSNOTIFY |
18 | default y | 19 | default y |
19 | ---help--- | 20 | ---help--- |
20 | Say Y here to enable inotify support for userspace, including the | 21 | Say Y here to enable inotify support for userspace, including the |
21 | associated system calls. Inotify allows monitoring of both files and | 22 | associated system calls. Inotify allows monitoring of both files and |
22 | directories via a single open fd. Events are read from the file | 23 | directories via a single open fd. Events are read from the file |
23 | descriptor, which is also select()- and poll()-able. | 24 | descriptor, which is also select()- and poll()-able. |
25 | Inotify fixes numerous shortcomings in dnotify and introduces several | ||
26 | new features including multiple file events, one-shot support, and | ||
27 | unmount notification. | ||
24 | 28 | ||
25 | For more information, see <file:Documentation/filesystems/inotify.txt> | 29 | For more information, see <file:Documentation/filesystems/inotify.txt> |
26 | 30 | ||
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile index e290f3bb9d8d..943828171362 100644 --- a/fs/notify/inotify/Makefile +++ b/fs/notify/inotify/Makefile | |||
@@ -1,2 +1,2 @@ | |||
1 | obj-$(CONFIG_INOTIFY) += inotify.o | 1 | obj-$(CONFIG_INOTIFY) += inotify.o |
2 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o | 2 | obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o |
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 220c13f0d73d..40b1cf914ccb 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/list.h> | 32 | #include <linux/list.h> |
33 | #include <linux/writeback.h> | 33 | #include <linux/writeback.h> |
34 | #include <linux/inotify.h> | 34 | #include <linux/inotify.h> |
35 | #include <linux/fsnotify_backend.h> | ||
35 | 36 | ||
36 | static atomic_t inotify_cookie; | 37 | static atomic_t inotify_cookie; |
37 | 38 | ||
@@ -905,6 +906,25 @@ EXPORT_SYMBOL_GPL(inotify_rm_watch); | |||
905 | */ | 906 | */ |
906 | static int __init inotify_setup(void) | 907 | static int __init inotify_setup(void) |
907 | { | 908 | { |
909 | BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); | ||
910 | BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); | ||
911 | BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); | ||
912 | BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); | ||
913 | BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); | ||
914 | BUILD_BUG_ON(IN_OPEN != FS_OPEN); | ||
915 | BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); | ||
916 | BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); | ||
917 | BUILD_BUG_ON(IN_CREATE != FS_CREATE); | ||
918 | BUILD_BUG_ON(IN_DELETE != FS_DELETE); | ||
919 | BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); | ||
920 | BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); | ||
921 | BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); | ||
922 | |||
923 | BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); | ||
924 | BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); | ||
925 | BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); | ||
926 | BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); | ||
927 | |||
908 | atomic_set(&inotify_cookie, 0); | 928 | atomic_set(&inotify_cookie, 0); |
909 | 929 | ||
910 | return 0; | 930 | return 0; |
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h new file mode 100644 index 000000000000..ea2605a58b8a --- /dev/null +++ b/fs/notify/inotify/inotify.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #include <linux/fsnotify_backend.h> | ||
2 | #include <linux/inotify.h> | ||
3 | #include <linux/slab.h> /* struct kmem_cache */ | ||
4 | |||
5 | extern struct kmem_cache *event_priv_cachep; | ||
6 | |||
7 | struct inotify_event_private_data { | ||
8 | struct fsnotify_event_private_data fsnotify_event_priv_data; | ||
9 | int wd; | ||
10 | }; | ||
11 | |||
12 | struct inotify_inode_mark_entry { | ||
13 | /* fsnotify_mark_entry MUST be the first thing */ | ||
14 | struct fsnotify_mark_entry fsn_entry; | ||
15 | int wd; | ||
16 | }; | ||
17 | |||
18 | extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); | ||
19 | extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); | ||
20 | |||
21 | extern const struct fsnotify_ops inotify_fsnotify_ops; | ||
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c new file mode 100644 index 000000000000..7ef75b83247e --- /dev/null +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -0,0 +1,138 @@ | |||
1 | /* | ||
2 | * fs/inotify_user.c - inotify support for userspace | ||
3 | * | ||
4 | * Authors: | ||
5 | * John McCutchan <ttb@tentacle.dhs.org> | ||
6 | * Robert Love <rml@novell.com> | ||
7 | * | ||
8 | * Copyright (C) 2005 John McCutchan | ||
9 | * Copyright 2006 Hewlett-Packard Development Company, L.P. | ||
10 | * | ||
11 | * Copyright (C) 2009 Eric Paris <Red Hat Inc> | ||
12 | * inotify was largely rewriten to make use of the fsnotify infrastructure | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the | ||
16 | * Free Software Foundation; either version 2, or (at your option) any | ||
17 | * later version. | ||
18 | * | ||
19 | * This program is distributed in the hope that it will be useful, but | ||
20 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
22 | * General Public License for more details. | ||
23 | */ | ||
24 | |||
25 | #include <linux/fs.h> /* struct inode */ | ||
26 | #include <linux/fsnotify_backend.h> | ||
27 | #include <linux/inotify.h> | ||
28 | #include <linux/path.h> /* struct path */ | ||
29 | #include <linux/slab.h> /* kmem_* */ | ||
30 | #include <linux/types.h> | ||
31 | |||
32 | #include "inotify.h" | ||
33 | |||
34 | static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event) | ||
35 | { | ||
36 | struct fsnotify_mark_entry *entry; | ||
37 | struct inotify_inode_mark_entry *ientry; | ||
38 | struct inode *to_tell; | ||
39 | struct inotify_event_private_data *event_priv; | ||
40 | struct fsnotify_event_private_data *fsn_event_priv; | ||
41 | int wd, ret; | ||
42 | |||
43 | to_tell = event->to_tell; | ||
44 | |||
45 | spin_lock(&to_tell->i_lock); | ||
46 | entry = fsnotify_find_mark_entry(group, to_tell); | ||
47 | spin_unlock(&to_tell->i_lock); | ||
48 | /* race with watch removal? We already passes should_send */ | ||
49 | if (unlikely(!entry)) | ||
50 | return 0; | ||
51 | ientry = container_of(entry, struct inotify_inode_mark_entry, | ||
52 | fsn_entry); | ||
53 | wd = ientry->wd; | ||
54 | |||
55 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); | ||
56 | if (unlikely(!event_priv)) | ||
57 | return -ENOMEM; | ||
58 | |||
59 | fsn_event_priv = &event_priv->fsnotify_event_priv_data; | ||
60 | |||
61 | fsn_event_priv->group = group; | ||
62 | event_priv->wd = wd; | ||
63 | |||
64 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); | ||
65 | /* EEXIST is not an error */ | ||
66 | if (ret == -EEXIST) | ||
67 | ret = 0; | ||
68 | |||
69 | /* did event_priv get attached? */ | ||
70 | if (list_empty(&fsn_event_priv->event_list)) | ||
71 | inotify_free_event_priv(fsn_event_priv); | ||
72 | |||
73 | /* | ||
74 | * If we hold the entry until after the event is on the queue | ||
75 | * IN_IGNORED won't be able to pass this event in the queue | ||
76 | */ | ||
77 | fsnotify_put_mark(entry); | ||
78 | |||
79 | return ret; | ||
80 | } | ||
81 | |||
82 | static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) | ||
83 | { | ||
84 | inotify_destroy_mark_entry(entry, group); | ||
85 | } | ||
86 | |||
87 | static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) | ||
88 | { | ||
89 | struct fsnotify_mark_entry *entry; | ||
90 | bool send; | ||
91 | |||
92 | spin_lock(&inode->i_lock); | ||
93 | entry = fsnotify_find_mark_entry(group, inode); | ||
94 | spin_unlock(&inode->i_lock); | ||
95 | if (!entry) | ||
96 | return false; | ||
97 | |||
98 | mask = (mask & ~FS_EVENT_ON_CHILD); | ||
99 | send = (entry->mask & mask); | ||
100 | |||
101 | /* find took a reference */ | ||
102 | fsnotify_put_mark(entry); | ||
103 | |||
104 | return send; | ||
105 | } | ||
106 | |||
107 | static int idr_callback(int id, void *p, void *data) | ||
108 | { | ||
109 | BUG(); | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static void inotify_free_group_priv(struct fsnotify_group *group) | ||
114 | { | ||
115 | /* ideally the idr is empty and we won't hit the BUG in teh callback */ | ||
116 | idr_for_each(&group->inotify_data.idr, idr_callback, NULL); | ||
117 | idr_remove_all(&group->inotify_data.idr); | ||
118 | idr_destroy(&group->inotify_data.idr); | ||
119 | } | ||
120 | |||
121 | void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) | ||
122 | { | ||
123 | struct inotify_event_private_data *event_priv; | ||
124 | |||
125 | |||
126 | event_priv = container_of(fsn_event_priv, struct inotify_event_private_data, | ||
127 | fsnotify_event_priv_data); | ||
128 | |||
129 | kmem_cache_free(event_priv_cachep, event_priv); | ||
130 | } | ||
131 | |||
132 | const struct fsnotify_ops inotify_fsnotify_ops = { | ||
133 | .handle_event = inotify_handle_event, | ||
134 | .should_send_event = inotify_should_send_event, | ||
135 | .free_group_priv = inotify_free_group_priv, | ||
136 | .free_event_priv = inotify_free_event_priv, | ||
137 | .freeing_mark = inotify_freeing_mark, | ||
138 | }; | ||
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 1634319e2404..982a412ac5bc 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -8,6 +8,9 @@ | |||
8 | * Copyright (C) 2005 John McCutchan | 8 | * Copyright (C) 2005 John McCutchan |
9 | * Copyright 2006 Hewlett-Packard Development Company, L.P. | 9 | * Copyright 2006 Hewlett-Packard Development Company, L.P. |
10 | * | 10 | * |
11 | * Copyright (C) 2009 Eric Paris <Red Hat Inc> | ||
12 | * inotify was largely rewriten to make use of the fsnotify infrastructure | ||
13 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | 14 | * This program is free software; you can redistribute it and/or modify it |
12 | * under the terms of the GNU General Public License as published by the | 15 | * under the terms of the GNU General Public License as published by the |
13 | * Free Software Foundation; either version 2, or (at your option) any | 16 | * Free Software Foundation; either version 2, or (at your option) any |
@@ -19,94 +22,48 @@ | |||
19 | * General Public License for more details. | 22 | * General Public License for more details. |
20 | */ | 23 | */ |
21 | 24 | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/fs.h> | ||
26 | #include <linux/file.h> | 25 | #include <linux/file.h> |
27 | #include <linux/mount.h> | 26 | #include <linux/fs.h> /* struct inode */ |
28 | #include <linux/namei.h> | 27 | #include <linux/fsnotify_backend.h> |
29 | #include <linux/poll.h> | 28 | #include <linux/idr.h> |
30 | #include <linux/init.h> | 29 | #include <linux/init.h> /* module_init */ |
31 | #include <linux/list.h> | ||
32 | #include <linux/inotify.h> | 30 | #include <linux/inotify.h> |
31 | #include <linux/kernel.h> /* roundup() */ | ||
32 | #include <linux/magic.h> /* superblock magic number */ | ||
33 | #include <linux/mount.h> /* mntget */ | ||
34 | #include <linux/namei.h> /* LOOKUP_FOLLOW */ | ||
35 | #include <linux/path.h> /* struct path */ | ||
36 | #include <linux/sched.h> /* struct user */ | ||
37 | #include <linux/slab.h> /* struct kmem_cache */ | ||
33 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
34 | #include <linux/magic.h> | 39 | #include <linux/types.h> |
40 | #include <linux/uaccess.h> | ||
41 | #include <linux/poll.h> | ||
42 | #include <linux/wait.h> | ||
35 | 43 | ||
36 | #include <asm/ioctls.h> | 44 | #include "inotify.h" |
37 | 45 | ||
38 | static struct kmem_cache *watch_cachep __read_mostly; | 46 | #include <asm/ioctls.h> |
39 | static struct kmem_cache *event_cachep __read_mostly; | ||
40 | 47 | ||
41 | static struct vfsmount *inotify_mnt __read_mostly; | 48 | static struct vfsmount *inotify_mnt __read_mostly; |
42 | 49 | ||
50 | /* this just sits here and wastes global memory. used to just pad userspace messages with zeros */ | ||
51 | static struct inotify_event nul_inotify_event; | ||
52 | |||
43 | /* these are configurable via /proc/sys/fs/inotify/ */ | 53 | /* these are configurable via /proc/sys/fs/inotify/ */ |
44 | static int inotify_max_user_instances __read_mostly; | 54 | static int inotify_max_user_instances __read_mostly; |
45 | static int inotify_max_user_watches __read_mostly; | ||
46 | static int inotify_max_queued_events __read_mostly; | 55 | static int inotify_max_queued_events __read_mostly; |
56 | int inotify_max_user_watches __read_mostly; | ||
47 | 57 | ||
48 | /* | 58 | static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; |
49 | * Lock ordering: | 59 | struct kmem_cache *event_priv_cachep __read_mostly; |
50 | * | 60 | static struct fsnotify_event *inotify_ignored_event; |
51 | * inotify_dev->up_mutex (ensures we don't re-add the same watch) | ||
52 | * inode->inotify_mutex (protects inode's watch list) | ||
53 | * inotify_handle->mutex (protects inotify_handle's watch list) | ||
54 | * inotify_dev->ev_mutex (protects device's event queue) | ||
55 | */ | ||
56 | 61 | ||
57 | /* | 62 | /* |
58 | * Lifetimes of the main data structures: | 63 | * When inotify registers a new group it increments this and uses that |
59 | * | 64 | * value as an offset to set the fsnotify group "name" and priority. |
60 | * inotify_device: Lifetime is managed by reference count, from | ||
61 | * sys_inotify_init() until release. Additional references can bump the count | ||
62 | * via get_inotify_dev() and drop the count via put_inotify_dev(). | ||
63 | * | ||
64 | * inotify_user_watch: Lifetime is from create_watch() to the receipt of an | ||
65 | * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the | ||
66 | * first event, or to inotify_destroy(). | ||
67 | */ | 65 | */ |
68 | 66 | static atomic_t inotify_grp_num; | |
69 | /* | ||
70 | * struct inotify_device - represents an inotify instance | ||
71 | * | ||
72 | * This structure is protected by the mutex 'mutex'. | ||
73 | */ | ||
74 | struct inotify_device { | ||
75 | wait_queue_head_t wq; /* wait queue for i/o */ | ||
76 | struct mutex ev_mutex; /* protects event queue */ | ||
77 | struct mutex up_mutex; /* synchronizes watch updates */ | ||
78 | struct list_head events; /* list of queued events */ | ||
79 | struct user_struct *user; /* user who opened this dev */ | ||
80 | struct inotify_handle *ih; /* inotify handle */ | ||
81 | struct fasync_struct *fa; /* async notification */ | ||
82 | atomic_t count; /* reference count */ | ||
83 | unsigned int queue_size; /* size of the queue (bytes) */ | ||
84 | unsigned int event_count; /* number of pending events */ | ||
85 | unsigned int max_events; /* maximum number of events */ | ||
86 | }; | ||
87 | |||
88 | /* | ||
89 | * struct inotify_kernel_event - An inotify event, originating from a watch and | ||
90 | * queued for user-space. A list of these is attached to each instance of the | ||
91 | * device. In read(), this list is walked and all events that can fit in the | ||
92 | * buffer are returned. | ||
93 | * | ||
94 | * Protected by dev->ev_mutex of the device in which we are queued. | ||
95 | */ | ||
96 | struct inotify_kernel_event { | ||
97 | struct inotify_event event; /* the user-space event */ | ||
98 | struct list_head list; /* entry in inotify_device's list */ | ||
99 | char *name; /* filename, if any */ | ||
100 | }; | ||
101 | |||
102 | /* | ||
103 | * struct inotify_user_watch - our version of an inotify_watch, we add | ||
104 | * a reference to the associated inotify_device. | ||
105 | */ | ||
106 | struct inotify_user_watch { | ||
107 | struct inotify_device *dev; /* associated device */ | ||
108 | struct inotify_watch wdata; /* inotify watch data */ | ||
109 | }; | ||
110 | 67 | ||
111 | #ifdef CONFIG_SYSCTL | 68 | #ifdef CONFIG_SYSCTL |
112 | 69 | ||
@@ -149,280 +106,36 @@ ctl_table inotify_table[] = { | |||
149 | }; | 106 | }; |
150 | #endif /* CONFIG_SYSCTL */ | 107 | #endif /* CONFIG_SYSCTL */ |
151 | 108 | ||
152 | static inline void get_inotify_dev(struct inotify_device *dev) | 109 | static inline __u32 inotify_arg_to_mask(u32 arg) |
153 | { | ||
154 | atomic_inc(&dev->count); | ||
155 | } | ||
156 | |||
157 | static inline void put_inotify_dev(struct inotify_device *dev) | ||
158 | { | ||
159 | if (atomic_dec_and_test(&dev->count)) { | ||
160 | atomic_dec(&dev->user->inotify_devs); | ||
161 | free_uid(dev->user); | ||
162 | kfree(dev); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * free_inotify_user_watch - cleans up the watch and its references | ||
168 | */ | ||
169 | static void free_inotify_user_watch(struct inotify_watch *w) | ||
170 | { | ||
171 | struct inotify_user_watch *watch; | ||
172 | struct inotify_device *dev; | ||
173 | |||
174 | watch = container_of(w, struct inotify_user_watch, wdata); | ||
175 | dev = watch->dev; | ||
176 | |||
177 | atomic_dec(&dev->user->inotify_watches); | ||
178 | put_inotify_dev(dev); | ||
179 | kmem_cache_free(watch_cachep, watch); | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * kernel_event - create a new kernel event with the given parameters | ||
184 | * | ||
185 | * This function can sleep. | ||
186 | */ | ||
187 | static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, | ||
188 | const char *name) | ||
189 | { | ||
190 | struct inotify_kernel_event *kevent; | ||
191 | |||
192 | kevent = kmem_cache_alloc(event_cachep, GFP_NOFS); | ||
193 | if (unlikely(!kevent)) | ||
194 | return NULL; | ||
195 | |||
196 | /* we hand this out to user-space, so zero it just in case */ | ||
197 | memset(&kevent->event, 0, sizeof(struct inotify_event)); | ||
198 | |||
199 | kevent->event.wd = wd; | ||
200 | kevent->event.mask = mask; | ||
201 | kevent->event.cookie = cookie; | ||
202 | |||
203 | INIT_LIST_HEAD(&kevent->list); | ||
204 | |||
205 | if (name) { | ||
206 | size_t len, rem, event_size = sizeof(struct inotify_event); | ||
207 | |||
208 | /* | ||
209 | * We need to pad the filename so as to properly align an | ||
210 | * array of inotify_event structures. Because the structure is | ||
211 | * small and the common case is a small filename, we just round | ||
212 | * up to the next multiple of the structure's sizeof. This is | ||
213 | * simple and safe for all architectures. | ||
214 | */ | ||
215 | len = strlen(name) + 1; | ||
216 | rem = event_size - len; | ||
217 | if (len > event_size) { | ||
218 | rem = event_size - (len % event_size); | ||
219 | if (len % event_size == 0) | ||
220 | rem = 0; | ||
221 | } | ||
222 | |||
223 | kevent->name = kmalloc(len + rem, GFP_NOFS); | ||
224 | if (unlikely(!kevent->name)) { | ||
225 | kmem_cache_free(event_cachep, kevent); | ||
226 | return NULL; | ||
227 | } | ||
228 | memcpy(kevent->name, name, len); | ||
229 | if (rem) | ||
230 | memset(kevent->name + len, 0, rem); | ||
231 | kevent->event.len = len + rem; | ||
232 | } else { | ||
233 | kevent->event.len = 0; | ||
234 | kevent->name = NULL; | ||
235 | } | ||
236 | |||
237 | return kevent; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * inotify_dev_get_event - return the next event in the given dev's queue | ||
242 | * | ||
243 | * Caller must hold dev->ev_mutex. | ||
244 | */ | ||
245 | static inline struct inotify_kernel_event * | ||
246 | inotify_dev_get_event(struct inotify_device *dev) | ||
247 | { | ||
248 | return list_entry(dev->events.next, struct inotify_kernel_event, list); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * inotify_dev_get_last_event - return the last event in the given dev's queue | ||
253 | * | ||
254 | * Caller must hold dev->ev_mutex. | ||
255 | */ | ||
256 | static inline struct inotify_kernel_event * | ||
257 | inotify_dev_get_last_event(struct inotify_device *dev) | ||
258 | { | 110 | { |
259 | if (list_empty(&dev->events)) | 111 | __u32 mask; |
260 | return NULL; | ||
261 | return list_entry(dev->events.prev, struct inotify_kernel_event, list); | ||
262 | } | ||
263 | 112 | ||
264 | /* | 113 | /* everything should accept their own ignored and cares about children */ |
265 | * inotify_dev_queue_event - event handler registered with core inotify, adds | 114 | mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD); |
266 | * a new event to the given device | ||
267 | * | ||
268 | * Can sleep (calls kernel_event()). | ||
269 | */ | ||
270 | static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, | ||
271 | u32 cookie, const char *name, | ||
272 | struct inode *ignored) | ||
273 | { | ||
274 | struct inotify_user_watch *watch; | ||
275 | struct inotify_device *dev; | ||
276 | struct inotify_kernel_event *kevent, *last; | ||
277 | 115 | ||
278 | watch = container_of(w, struct inotify_user_watch, wdata); | 116 | /* mask off the flags used to open the fd */ |
279 | dev = watch->dev; | 117 | mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT)); |
280 | 118 | ||
281 | mutex_lock(&dev->ev_mutex); | 119 | return mask; |
282 | |||
283 | /* we can safely put the watch as we don't reference it while | ||
284 | * generating the event | ||
285 | */ | ||
286 | if (mask & IN_IGNORED || w->mask & IN_ONESHOT) | ||
287 | put_inotify_watch(w); /* final put */ | ||
288 | |||
289 | /* coalescing: drop this event if it is a dupe of the previous */ | ||
290 | last = inotify_dev_get_last_event(dev); | ||
291 | if (last && last->event.mask == mask && last->event.wd == wd && | ||
292 | last->event.cookie == cookie) { | ||
293 | const char *lastname = last->name; | ||
294 | |||
295 | if (!name && !lastname) | ||
296 | goto out; | ||
297 | if (name && lastname && !strcmp(lastname, name)) | ||
298 | goto out; | ||
299 | } | ||
300 | |||
301 | /* the queue overflowed and we already sent the Q_OVERFLOW event */ | ||
302 | if (unlikely(dev->event_count > dev->max_events)) | ||
303 | goto out; | ||
304 | |||
305 | /* if the queue overflows, we need to notify user space */ | ||
306 | if (unlikely(dev->event_count == dev->max_events)) | ||
307 | kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL); | ||
308 | else | ||
309 | kevent = kernel_event(wd, mask, cookie, name); | ||
310 | |||
311 | if (unlikely(!kevent)) | ||
312 | goto out; | ||
313 | |||
314 | /* queue the event and wake up anyone waiting */ | ||
315 | dev->event_count++; | ||
316 | dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; | ||
317 | list_add_tail(&kevent->list, &dev->events); | ||
318 | wake_up_interruptible(&dev->wq); | ||
319 | kill_fasync(&dev->fa, SIGIO, POLL_IN); | ||
320 | |||
321 | out: | ||
322 | mutex_unlock(&dev->ev_mutex); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * remove_kevent - cleans up the given kevent | ||
327 | * | ||
328 | * Caller must hold dev->ev_mutex. | ||
329 | */ | ||
330 | static void remove_kevent(struct inotify_device *dev, | ||
331 | struct inotify_kernel_event *kevent) | ||
332 | { | ||
333 | list_del(&kevent->list); | ||
334 | |||
335 | dev->event_count--; | ||
336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * free_kevent - frees the given kevent. | ||
341 | */ | ||
342 | static void free_kevent(struct inotify_kernel_event *kevent) | ||
343 | { | ||
344 | kfree(kevent->name); | ||
345 | kmem_cache_free(event_cachep, kevent); | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * inotify_dev_event_dequeue - destroy an event on the given device | ||
350 | * | ||
351 | * Caller must hold dev->ev_mutex. | ||
352 | */ | ||
353 | static void inotify_dev_event_dequeue(struct inotify_device *dev) | ||
354 | { | ||
355 | if (!list_empty(&dev->events)) { | ||
356 | struct inotify_kernel_event *kevent; | ||
357 | kevent = inotify_dev_get_event(dev); | ||
358 | remove_kevent(dev, kevent); | ||
359 | free_kevent(kevent); | ||
360 | } | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * find_inode - resolve a user-given path to a specific inode | ||
365 | */ | ||
366 | static int find_inode(const char __user *dirname, struct path *path, | ||
367 | unsigned flags) | ||
368 | { | ||
369 | int error; | ||
370 | |||
371 | error = user_path_at(AT_FDCWD, dirname, flags, path); | ||
372 | if (error) | ||
373 | return error; | ||
374 | /* you can only watch an inode if you have read permissions on it */ | ||
375 | error = inode_permission(path->dentry->d_inode, MAY_READ); | ||
376 | if (error) | ||
377 | path_put(path); | ||
378 | return error; | ||
379 | } | 120 | } |
380 | 121 | ||
381 | /* | 122 | static inline u32 inotify_mask_to_arg(__u32 mask) |
382 | * create_watch - creates a watch on the given device. | ||
383 | * | ||
384 | * Callers must hold dev->up_mutex. | ||
385 | */ | ||
386 | static int create_watch(struct inotify_device *dev, struct inode *inode, | ||
387 | u32 mask) | ||
388 | { | 123 | { |
389 | struct inotify_user_watch *watch; | 124 | return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | |
390 | int ret; | 125 | IN_Q_OVERFLOW); |
391 | |||
392 | if (atomic_read(&dev->user->inotify_watches) >= | ||
393 | inotify_max_user_watches) | ||
394 | return -ENOSPC; | ||
395 | |||
396 | watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); | ||
397 | if (unlikely(!watch)) | ||
398 | return -ENOMEM; | ||
399 | |||
400 | /* save a reference to device and bump the count to make it official */ | ||
401 | get_inotify_dev(dev); | ||
402 | watch->dev = dev; | ||
403 | |||
404 | atomic_inc(&dev->user->inotify_watches); | ||
405 | |||
406 | inotify_init_watch(&watch->wdata); | ||
407 | ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask); | ||
408 | if (ret < 0) | ||
409 | free_inotify_user_watch(&watch->wdata); | ||
410 | |||
411 | return ret; | ||
412 | } | 126 | } |
413 | 127 | ||
414 | /* Device Interface */ | 128 | /* intofiy userspace file descriptor functions */ |
415 | |||
416 | static unsigned int inotify_poll(struct file *file, poll_table *wait) | 129 | static unsigned int inotify_poll(struct file *file, poll_table *wait) |
417 | { | 130 | { |
418 | struct inotify_device *dev = file->private_data; | 131 | struct fsnotify_group *group = file->private_data; |
419 | int ret = 0; | 132 | int ret = 0; |
420 | 133 | ||
421 | poll_wait(file, &dev->wq, wait); | 134 | poll_wait(file, &group->notification_waitq, wait); |
422 | mutex_lock(&dev->ev_mutex); | 135 | mutex_lock(&group->notification_mutex); |
423 | if (!list_empty(&dev->events)) | 136 | if (!fsnotify_notify_queue_is_empty(group)) |
424 | ret = POLLIN | POLLRDNORM; | 137 | ret = POLLIN | POLLRDNORM; |
425 | mutex_unlock(&dev->ev_mutex); | 138 | mutex_unlock(&group->notification_mutex); |
426 | 139 | ||
427 | return ret; | 140 | return ret; |
428 | } | 141 | } |
@@ -432,26 +145,29 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait) | |||
432 | * enough to fit in "count". Return an error pointer if | 145 | * enough to fit in "count". Return an error pointer if |
433 | * not large enough. | 146 | * not large enough. |
434 | * | 147 | * |
435 | * Called with the device ev_mutex held. | 148 | * Called with the group->notification_mutex held. |
436 | */ | 149 | */ |
437 | static struct inotify_kernel_event *get_one_event(struct inotify_device *dev, | 150 | static struct fsnotify_event *get_one_event(struct fsnotify_group *group, |
438 | size_t count) | 151 | size_t count) |
439 | { | 152 | { |
440 | size_t event_size = sizeof(struct inotify_event); | 153 | size_t event_size = sizeof(struct inotify_event); |
441 | struct inotify_kernel_event *kevent; | 154 | struct fsnotify_event *event; |
442 | 155 | ||
443 | if (list_empty(&dev->events)) | 156 | if (fsnotify_notify_queue_is_empty(group)) |
444 | return NULL; | 157 | return NULL; |
445 | 158 | ||
446 | kevent = inotify_dev_get_event(dev); | 159 | event = fsnotify_peek_notify_event(group); |
447 | if (kevent->name) | 160 | |
448 | event_size += kevent->event.len; | 161 | event_size += roundup(event->name_len, event_size); |
449 | 162 | ||
450 | if (event_size > count) | 163 | if (event_size > count) |
451 | return ERR_PTR(-EINVAL); | 164 | return ERR_PTR(-EINVAL); |
452 | 165 | ||
453 | remove_kevent(dev, kevent); | 166 | /* held the notification_mutex the whole time, so this is the |
454 | return kevent; | 167 | * same event we peeked above */ |
168 | fsnotify_remove_notify_event(group); | ||
169 | |||
170 | return event; | ||
455 | } | 171 | } |
456 | 172 | ||
457 | /* | 173 | /* |
@@ -460,51 +176,90 @@ static struct inotify_kernel_event *get_one_event(struct inotify_device *dev, | |||
460 | * We already checked that the event size is smaller than the | 176 | * We already checked that the event size is smaller than the |
461 | * buffer we had in "get_one_event()" above. | 177 | * buffer we had in "get_one_event()" above. |
462 | */ | 178 | */ |
463 | static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent, | 179 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
180 | struct fsnotify_event *event, | ||
464 | char __user *buf) | 181 | char __user *buf) |
465 | { | 182 | { |
183 | struct inotify_event inotify_event; | ||
184 | struct fsnotify_event_private_data *fsn_priv; | ||
185 | struct inotify_event_private_data *priv; | ||
466 | size_t event_size = sizeof(struct inotify_event); | 186 | size_t event_size = sizeof(struct inotify_event); |
187 | size_t name_len; | ||
188 | |||
189 | /* we get the inotify watch descriptor from the event private data */ | ||
190 | spin_lock(&event->lock); | ||
191 | fsn_priv = fsnotify_remove_priv_from_event(group, event); | ||
192 | spin_unlock(&event->lock); | ||
193 | |||
194 | if (!fsn_priv) | ||
195 | inotify_event.wd = -1; | ||
196 | else { | ||
197 | priv = container_of(fsn_priv, struct inotify_event_private_data, | ||
198 | fsnotify_event_priv_data); | ||
199 | inotify_event.wd = priv->wd; | ||
200 | inotify_free_event_priv(fsn_priv); | ||
201 | } | ||
202 | |||
203 | /* round up event->name_len so it is a multiple of event_size */ | ||
204 | name_len = roundup(event->name_len, event_size); | ||
205 | inotify_event.len = name_len; | ||
206 | |||
207 | inotify_event.mask = inotify_mask_to_arg(event->mask); | ||
208 | inotify_event.cookie = event->sync_cookie; | ||
467 | 209 | ||
468 | if (copy_to_user(buf, &kevent->event, event_size)) | 210 | /* send the main event */ |
211 | if (copy_to_user(buf, &inotify_event, event_size)) | ||
469 | return -EFAULT; | 212 | return -EFAULT; |
470 | 213 | ||
471 | if (kevent->name) { | 214 | buf += event_size; |
472 | buf += event_size; | ||
473 | 215 | ||
474 | if (copy_to_user(buf, kevent->name, kevent->event.len)) | 216 | /* |
217 | * fsnotify only stores the pathname, so here we have to send the pathname | ||
218 | * and then pad that pathname out to a multiple of sizeof(inotify_event) | ||
219 | * with zeros. I get my zeros from the nul_inotify_event. | ||
220 | */ | ||
221 | if (name_len) { | ||
222 | unsigned int len_to_zero = name_len - event->name_len; | ||
223 | /* copy the path name */ | ||
224 | if (copy_to_user(buf, event->file_name, event->name_len)) | ||
475 | return -EFAULT; | 225 | return -EFAULT; |
226 | buf += event->name_len; | ||
476 | 227 | ||
477 | event_size += kevent->event.len; | 228 | /* fill userspace with 0's from nul_inotify_event */ |
229 | if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) | ||
230 | return -EFAULT; | ||
231 | buf += len_to_zero; | ||
232 | event_size += name_len; | ||
478 | } | 233 | } |
234 | |||
479 | return event_size; | 235 | return event_size; |
480 | } | 236 | } |
481 | 237 | ||
482 | static ssize_t inotify_read(struct file *file, char __user *buf, | 238 | static ssize_t inotify_read(struct file *file, char __user *buf, |
483 | size_t count, loff_t *pos) | 239 | size_t count, loff_t *pos) |
484 | { | 240 | { |
485 | struct inotify_device *dev; | 241 | struct fsnotify_group *group; |
242 | struct fsnotify_event *kevent; | ||
486 | char __user *start; | 243 | char __user *start; |
487 | int ret; | 244 | int ret; |
488 | DEFINE_WAIT(wait); | 245 | DEFINE_WAIT(wait); |
489 | 246 | ||
490 | start = buf; | 247 | start = buf; |
491 | dev = file->private_data; | 248 | group = file->private_data; |
492 | 249 | ||
493 | while (1) { | 250 | while (1) { |
494 | struct inotify_kernel_event *kevent; | 251 | prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); |
495 | 252 | ||
496 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | 253 | mutex_lock(&group->notification_mutex); |
497 | 254 | kevent = get_one_event(group, count); | |
498 | mutex_lock(&dev->ev_mutex); | 255 | mutex_unlock(&group->notification_mutex); |
499 | kevent = get_one_event(dev, count); | ||
500 | mutex_unlock(&dev->ev_mutex); | ||
501 | 256 | ||
502 | if (kevent) { | 257 | if (kevent) { |
503 | ret = PTR_ERR(kevent); | 258 | ret = PTR_ERR(kevent); |
504 | if (IS_ERR(kevent)) | 259 | if (IS_ERR(kevent)) |
505 | break; | 260 | break; |
506 | ret = copy_event_to_user(kevent, buf); | 261 | ret = copy_event_to_user(group, kevent, buf); |
507 | free_kevent(kevent); | 262 | fsnotify_put_event(kevent); |
508 | if (ret < 0) | 263 | if (ret < 0) |
509 | break; | 264 | break; |
510 | buf += ret; | 265 | buf += ret; |
@@ -525,7 +280,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
525 | schedule(); | 280 | schedule(); |
526 | } | 281 | } |
527 | 282 | ||
528 | finish_wait(&dev->wq, &wait); | 283 | finish_wait(&group->notification_waitq, &wait); |
529 | if (start != buf && ret != -EFAULT) | 284 | if (start != buf && ret != -EFAULT) |
530 | ret = buf - start; | 285 | ret = buf - start; |
531 | return ret; | 286 | return ret; |
@@ -533,25 +288,19 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
533 | 288 | ||
534 | static int inotify_fasync(int fd, struct file *file, int on) | 289 | static int inotify_fasync(int fd, struct file *file, int on) |
535 | { | 290 | { |
536 | struct inotify_device *dev = file->private_data; | 291 | struct fsnotify_group *group = file->private_data; |
537 | 292 | ||
538 | return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO; | 293 | return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO; |
539 | } | 294 | } |
540 | 295 | ||
541 | static int inotify_release(struct inode *ignored, struct file *file) | 296 | static int inotify_release(struct inode *ignored, struct file *file) |
542 | { | 297 | { |
543 | struct inotify_device *dev = file->private_data; | 298 | struct fsnotify_group *group = file->private_data; |
544 | |||
545 | inotify_destroy(dev->ih); | ||
546 | 299 | ||
547 | /* destroy all of the events on this device */ | 300 | fsnotify_clear_marks_by_group(group); |
548 | mutex_lock(&dev->ev_mutex); | ||
549 | while (!list_empty(&dev->events)) | ||
550 | inotify_dev_event_dequeue(dev); | ||
551 | mutex_unlock(&dev->ev_mutex); | ||
552 | 301 | ||
553 | /* free this device: the put matching the get in inotify_init() */ | 302 | /* free this group, matching get was inotify_init->fsnotify_obtain_group */ |
554 | put_inotify_dev(dev); | 303 | fsnotify_put_group(group); |
555 | 304 | ||
556 | return 0; | 305 | return 0; |
557 | } | 306 | } |
@@ -559,16 +308,27 @@ static int inotify_release(struct inode *ignored, struct file *file) | |||
559 | static long inotify_ioctl(struct file *file, unsigned int cmd, | 308 | static long inotify_ioctl(struct file *file, unsigned int cmd, |
560 | unsigned long arg) | 309 | unsigned long arg) |
561 | { | 310 | { |
562 | struct inotify_device *dev; | 311 | struct fsnotify_group *group; |
312 | struct fsnotify_event_holder *holder; | ||
313 | struct fsnotify_event *event; | ||
563 | void __user *p; | 314 | void __user *p; |
564 | int ret = -ENOTTY; | 315 | int ret = -ENOTTY; |
316 | size_t send_len = 0; | ||
565 | 317 | ||
566 | dev = file->private_data; | 318 | group = file->private_data; |
567 | p = (void __user *) arg; | 319 | p = (void __user *) arg; |
568 | 320 | ||
569 | switch (cmd) { | 321 | switch (cmd) { |
570 | case FIONREAD: | 322 | case FIONREAD: |
571 | ret = put_user(dev->queue_size, (int __user *) p); | 323 | mutex_lock(&group->notification_mutex); |
324 | list_for_each_entry(holder, &group->notification_list, event_list) { | ||
325 | event = holder->event; | ||
326 | send_len += sizeof(struct inotify_event); | ||
327 | send_len += roundup(event->name_len, | ||
328 | sizeof(struct inotify_event)); | ||
329 | } | ||
330 | mutex_unlock(&group->notification_mutex); | ||
331 | ret = put_user(send_len, (int __user *) p); | ||
572 | break; | 332 | break; |
573 | } | 333 | } |
574 | 334 | ||
@@ -576,23 +336,233 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, | |||
576 | } | 336 | } |
577 | 337 | ||
578 | static const struct file_operations inotify_fops = { | 338 | static const struct file_operations inotify_fops = { |
579 | .poll = inotify_poll, | 339 | .poll = inotify_poll, |
580 | .read = inotify_read, | 340 | .read = inotify_read, |
581 | .fasync = inotify_fasync, | 341 | .fasync = inotify_fasync, |
582 | .release = inotify_release, | 342 | .release = inotify_release, |
583 | .unlocked_ioctl = inotify_ioctl, | 343 | .unlocked_ioctl = inotify_ioctl, |
584 | .compat_ioctl = inotify_ioctl, | 344 | .compat_ioctl = inotify_ioctl, |
585 | }; | 345 | }; |
586 | 346 | ||
587 | static const struct inotify_operations inotify_user_ops = { | ||
588 | .handle_event = inotify_dev_queue_event, | ||
589 | .destroy_watch = free_inotify_user_watch, | ||
590 | }; | ||
591 | 347 | ||
348 | /* | ||
349 | * find_inode - resolve a user-given path to a specific inode | ||
350 | */ | ||
351 | static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags) | ||
352 | { | ||
353 | int error; | ||
354 | |||
355 | error = user_path_at(AT_FDCWD, dirname, flags, path); | ||
356 | if (error) | ||
357 | return error; | ||
358 | /* you can only watch an inode if you have read permissions on it */ | ||
359 | error = inode_permission(path->dentry->d_inode, MAY_READ); | ||
360 | if (error) | ||
361 | path_put(path); | ||
362 | return error; | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * When, for whatever reason, inotify is done with a mark (or what used to be a | ||
367 | * watch) we need to remove that watch from the idr and we need to send IN_IGNORED | ||
368 | * for the given wd. | ||
369 | * | ||
370 | * There is a bit of recursion here. The loop looks like: | ||
371 | * inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry -> | ||
372 | * inotify_freeing_mark -> inotify_destory_mark_entry -> restart | ||
373 | * But the loop is broken in 2 places. fsnotify_destroy_mark_by_entry sets | ||
374 | * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup) | ||
375 | * test below will not call back to fsnotify again. But even if that test wasn't | ||
376 | * there this would still be safe since fsnotify_destroy_mark_by_entry() is | ||
377 | * safe from recursion. | ||
378 | */ | ||
379 | void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) | ||
380 | { | ||
381 | struct inotify_inode_mark_entry *ientry; | ||
382 | struct inotify_event_private_data *event_priv; | ||
383 | struct fsnotify_event_private_data *fsn_event_priv; | ||
384 | struct fsnotify_group *egroup; | ||
385 | struct idr *idr; | ||
386 | |||
387 | spin_lock(&entry->lock); | ||
388 | egroup = entry->group; | ||
389 | |||
390 | /* if egroup we aren't really done and something might still send events | ||
391 | * for this inode, on the callback we'll send the IN_IGNORED */ | ||
392 | if (egroup) { | ||
393 | spin_unlock(&entry->lock); | ||
394 | fsnotify_destroy_mark_by_entry(entry); | ||
395 | return; | ||
396 | } | ||
397 | spin_unlock(&entry->lock); | ||
398 | |||
399 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
400 | |||
401 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); | ||
402 | if (unlikely(!event_priv)) | ||
403 | goto skip_send_ignore; | ||
404 | |||
405 | fsn_event_priv = &event_priv->fsnotify_event_priv_data; | ||
406 | |||
407 | fsn_event_priv->group = group; | ||
408 | event_priv->wd = ientry->wd; | ||
409 | |||
410 | fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv); | ||
411 | |||
412 | /* did the private data get added? */ | ||
413 | if (list_empty(&fsn_event_priv->event_list)) | ||
414 | inotify_free_event_priv(fsn_event_priv); | ||
415 | |||
416 | skip_send_ignore: | ||
417 | |||
418 | /* remove this entry from the idr */ | ||
419 | spin_lock(&group->inotify_data.idr_lock); | ||
420 | idr = &group->inotify_data.idr; | ||
421 | idr_remove(idr, ientry->wd); | ||
422 | spin_unlock(&group->inotify_data.idr_lock); | ||
423 | |||
424 | /* removed from idr, drop that reference */ | ||
425 | fsnotify_put_mark(entry); | ||
426 | } | ||
427 | |||
428 | /* ding dong the mark is dead */ | ||
429 | static void inotify_free_mark(struct fsnotify_mark_entry *entry) | ||
430 | { | ||
431 | struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry; | ||
432 | |||
433 | kmem_cache_free(inotify_inode_mark_cachep, ientry); | ||
434 | } | ||
435 | |||
436 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) | ||
437 | { | ||
438 | struct fsnotify_mark_entry *entry = NULL; | ||
439 | struct inotify_inode_mark_entry *ientry; | ||
440 | int ret = 0; | ||
441 | int add = (arg & IN_MASK_ADD); | ||
442 | __u32 mask; | ||
443 | __u32 old_mask, new_mask; | ||
444 | |||
445 | /* don't allow invalid bits: we don't want flags set */ | ||
446 | mask = inotify_arg_to_mask(arg); | ||
447 | if (unlikely(!mask)) | ||
448 | return -EINVAL; | ||
449 | |||
450 | ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | ||
451 | if (unlikely(!ientry)) | ||
452 | return -ENOMEM; | ||
453 | /* we set the mask at the end after attaching it */ | ||
454 | fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark); | ||
455 | ientry->wd = 0; | ||
456 | |||
457 | find_entry: | ||
458 | spin_lock(&inode->i_lock); | ||
459 | entry = fsnotify_find_mark_entry(group, inode); | ||
460 | spin_unlock(&inode->i_lock); | ||
461 | if (entry) { | ||
462 | kmem_cache_free(inotify_inode_mark_cachep, ientry); | ||
463 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
464 | } else { | ||
465 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) { | ||
466 | ret = -ENOSPC; | ||
467 | goto out_err; | ||
468 | } | ||
469 | |||
470 | ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode); | ||
471 | if (ret == -EEXIST) | ||
472 | goto find_entry; | ||
473 | else if (ret) | ||
474 | goto out_err; | ||
475 | |||
476 | entry = &ientry->fsn_entry; | ||
477 | retry: | ||
478 | ret = -ENOMEM; | ||
479 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
480 | goto out_err; | ||
481 | |||
482 | spin_lock(&group->inotify_data.idr_lock); | ||
483 | /* if entry is added to the idr we keep the reference obtained | ||
484 | * through fsnotify_mark_add. remember to drop this reference | ||
485 | * when entry is removed from idr */ | ||
486 | ret = idr_get_new_above(&group->inotify_data.idr, entry, | ||
487 | ++group->inotify_data.last_wd, | ||
488 | &ientry->wd); | ||
489 | spin_unlock(&group->inotify_data.idr_lock); | ||
490 | if (ret) { | ||
491 | if (ret == -EAGAIN) | ||
492 | goto retry; | ||
493 | goto out_err; | ||
494 | } | ||
495 | atomic_inc(&group->inotify_data.user->inotify_watches); | ||
496 | } | ||
497 | |||
498 | spin_lock(&entry->lock); | ||
499 | |||
500 | old_mask = entry->mask; | ||
501 | if (add) { | ||
502 | entry->mask |= mask; | ||
503 | new_mask = entry->mask; | ||
504 | } else { | ||
505 | entry->mask = mask; | ||
506 | new_mask = entry->mask; | ||
507 | } | ||
508 | |||
509 | spin_unlock(&entry->lock); | ||
510 | |||
511 | if (old_mask != new_mask) { | ||
512 | /* more bits in old than in new? */ | ||
513 | int dropped = (old_mask & ~new_mask); | ||
514 | /* more bits in this entry than the inode's mask? */ | ||
515 | int do_inode = (new_mask & ~inode->i_fsnotify_mask); | ||
516 | /* more bits in this entry than the group? */ | ||
517 | int do_group = (new_mask & ~group->mask); | ||
518 | |||
519 | /* update the inode with this new entry */ | ||
520 | if (dropped || do_inode) | ||
521 | fsnotify_recalc_inode_mask(inode); | ||
522 | |||
523 | /* update the group mask with the new mask */ | ||
524 | if (dropped || do_group) | ||
525 | fsnotify_recalc_group_mask(group); | ||
526 | } | ||
527 | |||
528 | return ientry->wd; | ||
529 | |||
530 | out_err: | ||
531 | /* see this isn't supposed to happen, just kill the watch */ | ||
532 | if (entry) { | ||
533 | fsnotify_destroy_mark_by_entry(entry); | ||
534 | fsnotify_put_mark(entry); | ||
535 | } | ||
536 | return ret; | ||
537 | } | ||
538 | |||
539 | static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) | ||
540 | { | ||
541 | struct fsnotify_group *group; | ||
542 | unsigned int grp_num; | ||
543 | |||
544 | /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ | ||
545 | grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num)); | ||
546 | group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops); | ||
547 | if (IS_ERR(group)) | ||
548 | return group; | ||
549 | |||
550 | group->max_events = max_events; | ||
551 | |||
552 | spin_lock_init(&group->inotify_data.idr_lock); | ||
553 | idr_init(&group->inotify_data.idr); | ||
554 | group->inotify_data.last_wd = 0; | ||
555 | group->inotify_data.user = user; | ||
556 | group->inotify_data.fa = NULL; | ||
557 | |||
558 | return group; | ||
559 | } | ||
560 | |||
561 | |||
562 | /* inotify syscalls */ | ||
592 | SYSCALL_DEFINE1(inotify_init1, int, flags) | 563 | SYSCALL_DEFINE1(inotify_init1, int, flags) |
593 | { | 564 | { |
594 | struct inotify_device *dev; | 565 | struct fsnotify_group *group; |
595 | struct inotify_handle *ih; | ||
596 | struct user_struct *user; | 566 | struct user_struct *user; |
597 | struct file *filp; | 567 | struct file *filp; |
598 | int fd, ret; | 568 | int fd, ret; |
@@ -621,45 +591,27 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) | |||
621 | goto out_free_uid; | 591 | goto out_free_uid; |
622 | } | 592 | } |
623 | 593 | ||
624 | dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); | 594 | /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ |
625 | if (unlikely(!dev)) { | 595 | group = inotify_new_group(user, inotify_max_queued_events); |
626 | ret = -ENOMEM; | 596 | if (IS_ERR(group)) { |
597 | ret = PTR_ERR(group); | ||
627 | goto out_free_uid; | 598 | goto out_free_uid; |
628 | } | 599 | } |
629 | 600 | ||
630 | ih = inotify_init(&inotify_user_ops); | ||
631 | if (IS_ERR(ih)) { | ||
632 | ret = PTR_ERR(ih); | ||
633 | goto out_free_dev; | ||
634 | } | ||
635 | dev->ih = ih; | ||
636 | dev->fa = NULL; | ||
637 | |||
638 | filp->f_op = &inotify_fops; | 601 | filp->f_op = &inotify_fops; |
639 | filp->f_path.mnt = mntget(inotify_mnt); | 602 | filp->f_path.mnt = mntget(inotify_mnt); |
640 | filp->f_path.dentry = dget(inotify_mnt->mnt_root); | 603 | filp->f_path.dentry = dget(inotify_mnt->mnt_root); |
641 | filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; | 604 | filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; |
642 | filp->f_mode = FMODE_READ; | 605 | filp->f_mode = FMODE_READ; |
643 | filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); | 606 | filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); |
644 | filp->private_data = dev; | 607 | filp->private_data = group; |
645 | 608 | ||
646 | INIT_LIST_HEAD(&dev->events); | ||
647 | init_waitqueue_head(&dev->wq); | ||
648 | mutex_init(&dev->ev_mutex); | ||
649 | mutex_init(&dev->up_mutex); | ||
650 | dev->event_count = 0; | ||
651 | dev->queue_size = 0; | ||
652 | dev->max_events = inotify_max_queued_events; | ||
653 | dev->user = user; | ||
654 | atomic_set(&dev->count, 0); | ||
655 | |||
656 | get_inotify_dev(dev); | ||
657 | atomic_inc(&user->inotify_devs); | 609 | atomic_inc(&user->inotify_devs); |
610 | |||
658 | fd_install(fd, filp); | 611 | fd_install(fd, filp); |
659 | 612 | ||
660 | return fd; | 613 | return fd; |
661 | out_free_dev: | 614 | |
662 | kfree(dev); | ||
663 | out_free_uid: | 615 | out_free_uid: |
664 | free_uid(user); | 616 | free_uid(user); |
665 | put_filp(filp); | 617 | put_filp(filp); |
@@ -676,8 +628,8 @@ SYSCALL_DEFINE0(inotify_init) | |||
676 | SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, | 628 | SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, |
677 | u32, mask) | 629 | u32, mask) |
678 | { | 630 | { |
631 | struct fsnotify_group *group; | ||
679 | struct inode *inode; | 632 | struct inode *inode; |
680 | struct inotify_device *dev; | ||
681 | struct path path; | 633 | struct path path; |
682 | struct file *filp; | 634 | struct file *filp; |
683 | int ret, fput_needed; | 635 | int ret, fput_needed; |
@@ -698,20 +650,20 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, | |||
698 | if (mask & IN_ONLYDIR) | 650 | if (mask & IN_ONLYDIR) |
699 | flags |= LOOKUP_DIRECTORY; | 651 | flags |= LOOKUP_DIRECTORY; |
700 | 652 | ||
701 | ret = find_inode(pathname, &path, flags); | 653 | ret = inotify_find_inode(pathname, &path, flags); |
702 | if (unlikely(ret)) | 654 | if (ret) |
703 | goto fput_and_out; | 655 | goto fput_and_out; |
704 | 656 | ||
705 | /* inode held in place by reference to path; dev by fget on fd */ | 657 | /* inode held in place by reference to path; group by fget on fd */ |
706 | inode = path.dentry->d_inode; | 658 | inode = path.dentry->d_inode; |
707 | dev = filp->private_data; | 659 | group = filp->private_data; |
708 | 660 | ||
709 | mutex_lock(&dev->up_mutex); | 661 | /* create/update an inode mark */ |
710 | ret = inotify_find_update_watch(dev->ih, inode, mask); | 662 | ret = inotify_update_watch(group, inode, mask); |
711 | if (ret == -ENOENT) | 663 | if (unlikely(ret)) |
712 | ret = create_watch(dev, inode, mask); | 664 | goto path_put_and_out; |
713 | mutex_unlock(&dev->up_mutex); | ||
714 | 665 | ||
666 | path_put_and_out: | ||
715 | path_put(&path); | 667 | path_put(&path); |
716 | fput_and_out: | 668 | fput_and_out: |
717 | fput_light(filp, fput_needed); | 669 | fput_light(filp, fput_needed); |
@@ -720,9 +672,10 @@ fput_and_out: | |||
720 | 672 | ||
721 | SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | 673 | SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) |
722 | { | 674 | { |
675 | struct fsnotify_group *group; | ||
676 | struct fsnotify_mark_entry *entry; | ||
723 | struct file *filp; | 677 | struct file *filp; |
724 | struct inotify_device *dev; | 678 | int ret = 0, fput_needed; |
725 | int ret, fput_needed; | ||
726 | 679 | ||
727 | filp = fget_light(fd, &fput_needed); | 680 | filp = fget_light(fd, &fput_needed); |
728 | if (unlikely(!filp)) | 681 | if (unlikely(!filp)) |
@@ -734,10 +687,20 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | |||
734 | goto out; | 687 | goto out; |
735 | } | 688 | } |
736 | 689 | ||
737 | dev = filp->private_data; | 690 | group = filp->private_data; |
738 | 691 | ||
739 | /* we free our watch data when we get IN_IGNORED */ | 692 | spin_lock(&group->inotify_data.idr_lock); |
740 | ret = inotify_rm_wd(dev->ih, wd); | 693 | entry = idr_find(&group->inotify_data.idr, wd); |
694 | if (unlikely(!entry)) { | ||
695 | spin_unlock(&group->inotify_data.idr_lock); | ||
696 | ret = -EINVAL; | ||
697 | goto out; | ||
698 | } | ||
699 | fsnotify_get_mark(entry); | ||
700 | spin_unlock(&group->inotify_data.idr_lock); | ||
701 | |||
702 | inotify_destroy_mark_entry(entry, group); | ||
703 | fsnotify_put_mark(entry); | ||
741 | 704 | ||
742 | out: | 705 | out: |
743 | fput_light(filp, fput_needed); | 706 | fput_light(filp, fput_needed); |
@@ -753,9 +716,9 @@ inotify_get_sb(struct file_system_type *fs_type, int flags, | |||
753 | } | 716 | } |
754 | 717 | ||
755 | static struct file_system_type inotify_fs_type = { | 718 | static struct file_system_type inotify_fs_type = { |
756 | .name = "inotifyfs", | 719 | .name = "inotifyfs", |
757 | .get_sb = inotify_get_sb, | 720 | .get_sb = inotify_get_sb, |
758 | .kill_sb = kill_anon_super, | 721 | .kill_sb = kill_anon_super, |
759 | }; | 722 | }; |
760 | 723 | ||
761 | /* | 724 | /* |
@@ -775,18 +738,16 @@ static int __init inotify_user_setup(void) | |||
775 | if (IS_ERR(inotify_mnt)) | 738 | if (IS_ERR(inotify_mnt)) |
776 | panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); | 739 | panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); |
777 | 740 | ||
741 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); | ||
742 | event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); | ||
743 | inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0); | ||
744 | if (!inotify_ignored_event) | ||
745 | panic("unable to allocate the inotify ignored event\n"); | ||
746 | |||
778 | inotify_max_queued_events = 16384; | 747 | inotify_max_queued_events = 16384; |
779 | inotify_max_user_instances = 128; | 748 | inotify_max_user_instances = 128; |
780 | inotify_max_user_watches = 8192; | 749 | inotify_max_user_watches = 8192; |
781 | 750 | ||
782 | watch_cachep = kmem_cache_create("inotify_watch_cache", | ||
783 | sizeof(struct inotify_user_watch), | ||
784 | 0, SLAB_PANIC, NULL); | ||
785 | event_cachep = kmem_cache_create("inotify_event_cache", | ||
786 | sizeof(struct inotify_kernel_event), | ||
787 | 0, SLAB_PANIC, NULL); | ||
788 | |||
789 | return 0; | 751 | return 0; |
790 | } | 752 | } |
791 | |||
792 | module_init(inotify_user_setup); | 753 | module_init(inotify_user_setup); |
diff --git a/fs/notify/notification.c b/fs/notify/notification.c new file mode 100644 index 000000000000..959b73e756fd --- /dev/null +++ b/fs/notify/notification.c | |||
@@ -0,0 +1,411 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; see the file COPYING. If not, write to | ||
16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
17 | */ | ||
18 | |||
19 | /* | ||
20 | * Basic idea behind the notification queue: An fsnotify group (like inotify) | ||
21 | * sends the userspace notification about events asyncronously some time after | ||
22 | * the event happened. When inotify gets an event it will need to add that | ||
23 | * event to the group notify queue. Since a single event might need to be on | ||
24 | * multiple group's notification queues we can't add the event directly to each | ||
25 | * queue and instead add a small "event_holder" to each queue. This event_holder | ||
26 | * has a pointer back to the original event. Since the majority of events are | ||
27 | * going to end up on one, and only one, notification queue we embed one | ||
28 | * event_holder into each event. This means we have a single allocation instead | ||
29 | * of always needing two. If the embedded event_holder is already in use by | ||
30 | * another group a new event_holder (from fsnotify_event_holder_cachep) will be | ||
31 | * allocated and used. | ||
32 | */ | ||
33 | |||
34 | #include <linux/fs.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/list.h> | ||
38 | #include <linux/module.h> | ||
39 | #include <linux/mount.h> | ||
40 | #include <linux/mutex.h> | ||
41 | #include <linux/namei.h> | ||
42 | #include <linux/path.h> | ||
43 | #include <linux/slab.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | |||
46 | #include <asm/atomic.h> | ||
47 | |||
48 | #include <linux/fsnotify_backend.h> | ||
49 | #include "fsnotify.h" | ||
50 | |||
51 | static struct kmem_cache *fsnotify_event_cachep; | ||
52 | static struct kmem_cache *fsnotify_event_holder_cachep; | ||
53 | /* | ||
54 | * This is a magic event we send when the q is too full. Since it doesn't | ||
55 | * hold real event information we just keep one system wide and use it any time | ||
56 | * it is needed. It's refcnt is set 1 at kernel init time and will never | ||
57 | * get set to 0 so it will never get 'freed' | ||
58 | */ | ||
59 | static struct fsnotify_event q_overflow_event; | ||
60 | static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); | ||
61 | |||
62 | /** | ||
63 | * fsnotify_get_cookie - return a unique cookie for use in synchronizing events. | ||
64 | * Called from fsnotify_move, which is inlined into filesystem modules. | ||
65 | */ | ||
66 | u32 fsnotify_get_cookie(void) | ||
67 | { | ||
68 | return atomic_inc_return(&fsnotify_sync_cookie); | ||
69 | } | ||
70 | EXPORT_SYMBOL_GPL(fsnotify_get_cookie); | ||
71 | |||
72 | /* return true if the notify queue is empty, false otherwise */ | ||
73 | bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) | ||
74 | { | ||
75 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); | ||
76 | return list_empty(&group->notification_list) ? true : false; | ||
77 | } | ||
78 | |||
79 | void fsnotify_get_event(struct fsnotify_event *event) | ||
80 | { | ||
81 | atomic_inc(&event->refcnt); | ||
82 | } | ||
83 | |||
84 | void fsnotify_put_event(struct fsnotify_event *event) | ||
85 | { | ||
86 | if (!event) | ||
87 | return; | ||
88 | |||
89 | if (atomic_dec_and_test(&event->refcnt)) { | ||
90 | if (event->data_type == FSNOTIFY_EVENT_PATH) | ||
91 | path_put(&event->path); | ||
92 | |||
93 | BUG_ON(!list_empty(&event->private_data_list)); | ||
94 | |||
95 | kfree(event->file_name); | ||
96 | kmem_cache_free(fsnotify_event_cachep, event); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | struct fsnotify_event_holder *fsnotify_alloc_event_holder(void) | ||
101 | { | ||
102 | return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL); | ||
103 | } | ||
104 | |||
105 | void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) | ||
106 | { | ||
107 | kmem_cache_free(fsnotify_event_holder_cachep, holder); | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Find the private data that the group previously attached to this event when | ||
112 | * the group added the event to the notification queue (fsnotify_add_notify_event) | ||
113 | */ | ||
114 | struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event) | ||
115 | { | ||
116 | struct fsnotify_event_private_data *lpriv; | ||
117 | struct fsnotify_event_private_data *priv = NULL; | ||
118 | |||
119 | assert_spin_locked(&event->lock); | ||
120 | |||
121 | list_for_each_entry(lpriv, &event->private_data_list, event_list) { | ||
122 | if (lpriv->group == group) { | ||
123 | priv = lpriv; | ||
124 | list_del(&priv->event_list); | ||
125 | break; | ||
126 | } | ||
127 | } | ||
128 | return priv; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Check if 2 events contain the same information. We do not compare private data | ||
133 | * but at this moment that isn't a problem for any know fsnotify listeners. | ||
134 | */ | ||
135 | static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) | ||
136 | { | ||
137 | if ((old->mask == new->mask) && | ||
138 | (old->to_tell == new->to_tell) && | ||
139 | (old->data_type == new->data_type)) { | ||
140 | switch (old->data_type) { | ||
141 | case (FSNOTIFY_EVENT_INODE): | ||
142 | if (old->inode == new->inode) | ||
143 | return true; | ||
144 | break; | ||
145 | case (FSNOTIFY_EVENT_PATH): | ||
146 | if ((old->path.mnt == new->path.mnt) && | ||
147 | (old->path.dentry == new->path.dentry)) | ||
148 | return true; | ||
149 | case (FSNOTIFY_EVENT_NONE): | ||
150 | return true; | ||
151 | }; | ||
152 | } | ||
153 | return false; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Add an event to the group notification queue. The group can later pull this | ||
158 | * event off the queue to deal with. If the event is successfully added to the | ||
159 | * group's notification queue, a reference is taken on event. | ||
160 | */ | ||
161 | int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, | ||
162 | struct fsnotify_event_private_data *priv) | ||
163 | { | ||
164 | struct fsnotify_event_holder *holder = NULL; | ||
165 | struct list_head *list = &group->notification_list; | ||
166 | struct fsnotify_event_holder *last_holder; | ||
167 | struct fsnotify_event *last_event; | ||
168 | |||
169 | /* easy to tell if priv was attached to the event */ | ||
170 | INIT_LIST_HEAD(&priv->event_list); | ||
171 | |||
172 | /* | ||
173 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. | ||
174 | * Check if we expect to be able to use that holder. If not alloc a new | ||
175 | * holder. | ||
176 | * For the overflow event it's possible that something will use the in | ||
177 | * event holder before we get the lock so we may need to jump back and | ||
178 | * alloc a new holder, this can't happen for most events... | ||
179 | */ | ||
180 | if (!list_empty(&event->holder.event_list)) { | ||
181 | alloc_holder: | ||
182 | holder = fsnotify_alloc_event_holder(); | ||
183 | if (!holder) | ||
184 | return -ENOMEM; | ||
185 | } | ||
186 | |||
187 | mutex_lock(&group->notification_mutex); | ||
188 | |||
189 | if (group->q_len >= group->max_events) { | ||
190 | event = &q_overflow_event; | ||
191 | /* sorry, no private data on the overflow event */ | ||
192 | priv = NULL; | ||
193 | } | ||
194 | |||
195 | spin_lock(&event->lock); | ||
196 | |||
197 | if (list_empty(&event->holder.event_list)) { | ||
198 | if (unlikely(holder)) | ||
199 | fsnotify_destroy_event_holder(holder); | ||
200 | holder = &event->holder; | ||
201 | } else if (unlikely(!holder)) { | ||
202 | /* between the time we checked above and got the lock the in | ||
203 | * event holder was used, go back and get a new one */ | ||
204 | spin_unlock(&event->lock); | ||
205 | mutex_unlock(&group->notification_mutex); | ||
206 | goto alloc_holder; | ||
207 | } | ||
208 | |||
209 | if (!list_empty(list)) { | ||
210 | last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list); | ||
211 | last_event = last_holder->event; | ||
212 | if (event_compare(last_event, event)) { | ||
213 | spin_unlock(&event->lock); | ||
214 | mutex_unlock(&group->notification_mutex); | ||
215 | if (holder != &event->holder) | ||
216 | fsnotify_destroy_event_holder(holder); | ||
217 | return -EEXIST; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | group->q_len++; | ||
222 | holder->event = event; | ||
223 | |||
224 | fsnotify_get_event(event); | ||
225 | list_add_tail(&holder->event_list, list); | ||
226 | if (priv) | ||
227 | list_add_tail(&priv->event_list, &event->private_data_list); | ||
228 | spin_unlock(&event->lock); | ||
229 | mutex_unlock(&group->notification_mutex); | ||
230 | |||
231 | wake_up(&group->notification_waitq); | ||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Remove and return the first event from the notification list. There is a | ||
237 | * reference held on this event since it was on the list. It is the responsibility | ||
238 | * of the caller to drop this reference. | ||
239 | */ | ||
240 | struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) | ||
241 | { | ||
242 | struct fsnotify_event *event; | ||
243 | struct fsnotify_event_holder *holder; | ||
244 | |||
245 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); | ||
246 | |||
247 | holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); | ||
248 | |||
249 | event = holder->event; | ||
250 | |||
251 | spin_lock(&event->lock); | ||
252 | holder->event = NULL; | ||
253 | list_del_init(&holder->event_list); | ||
254 | spin_unlock(&event->lock); | ||
255 | |||
256 | /* event == holder means we are referenced through the in event holder */ | ||
257 | if (holder != &event->holder) | ||
258 | fsnotify_destroy_event_holder(holder); | ||
259 | |||
260 | group->q_len--; | ||
261 | |||
262 | return event; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * This will not remove the event, that must be done with fsnotify_remove_notify_event() | ||
267 | */ | ||
268 | struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) | ||
269 | { | ||
270 | struct fsnotify_event *event; | ||
271 | struct fsnotify_event_holder *holder; | ||
272 | |||
273 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); | ||
274 | |||
275 | holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); | ||
276 | event = holder->event; | ||
277 | |||
278 | return event; | ||
279 | } | ||
280 | |||
281 | /* | ||
282 | * Called when a group is being torn down to clean up any outstanding | ||
283 | * event notifications. | ||
284 | */ | ||
285 | void fsnotify_flush_notify(struct fsnotify_group *group) | ||
286 | { | ||
287 | struct fsnotify_event *event; | ||
288 | struct fsnotify_event_private_data *priv; | ||
289 | |||
290 | mutex_lock(&group->notification_mutex); | ||
291 | while (!fsnotify_notify_queue_is_empty(group)) { | ||
292 | event = fsnotify_remove_notify_event(group); | ||
293 | /* if they don't implement free_event_priv they better not have attached any */ | ||
294 | if (group->ops->free_event_priv) { | ||
295 | spin_lock(&event->lock); | ||
296 | priv = fsnotify_remove_priv_from_event(group, event); | ||
297 | spin_unlock(&event->lock); | ||
298 | if (priv) | ||
299 | group->ops->free_event_priv(priv); | ||
300 | } | ||
301 | fsnotify_put_event(event); /* matches fsnotify_add_notify_event */ | ||
302 | } | ||
303 | mutex_unlock(&group->notification_mutex); | ||
304 | } | ||
305 | |||
306 | static void initialize_event(struct fsnotify_event *event) | ||
307 | { | ||
308 | event->holder.event = NULL; | ||
309 | INIT_LIST_HEAD(&event->holder.event_list); | ||
310 | atomic_set(&event->refcnt, 1); | ||
311 | |||
312 | spin_lock_init(&event->lock); | ||
313 | |||
314 | event->path.dentry = NULL; | ||
315 | event->path.mnt = NULL; | ||
316 | event->inode = NULL; | ||
317 | event->data_type = FSNOTIFY_EVENT_NONE; | ||
318 | |||
319 | INIT_LIST_HEAD(&event->private_data_list); | ||
320 | |||
321 | event->to_tell = NULL; | ||
322 | |||
323 | event->file_name = NULL; | ||
324 | event->name_len = 0; | ||
325 | |||
326 | event->sync_cookie = 0; | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * fsnotify_create_event - Allocate a new event which will be sent to each | ||
331 | * group's handle_event function if the group was interested in this | ||
332 | * particular event. | ||
333 | * | ||
334 | * @to_tell the inode which is supposed to receive the event (sometimes a | ||
335 | * parent of the inode to which the event happened. | ||
336 | * @mask what actually happened. | ||
337 | * @data pointer to the object which was actually affected | ||
338 | * @data_type flag indication if the data is a file, path, inode, nothing... | ||
339 | * @name the filename, if available | ||
340 | */ | ||
341 | struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, | ||
342 | int data_type, const char *name, u32 cookie) | ||
343 | { | ||
344 | struct fsnotify_event *event; | ||
345 | |||
346 | event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); | ||
347 | if (!event) | ||
348 | return NULL; | ||
349 | |||
350 | initialize_event(event); | ||
351 | |||
352 | if (name) { | ||
353 | event->file_name = kstrdup(name, GFP_KERNEL); | ||
354 | if (!event->file_name) { | ||
355 | kmem_cache_free(fsnotify_event_cachep, event); | ||
356 | return NULL; | ||
357 | } | ||
358 | event->name_len = strlen(event->file_name); | ||
359 | } | ||
360 | |||
361 | event->sync_cookie = cookie; | ||
362 | event->to_tell = to_tell; | ||
363 | |||
364 | switch (data_type) { | ||
365 | case FSNOTIFY_EVENT_FILE: { | ||
366 | struct file *file = data; | ||
367 | struct path *path = &file->f_path; | ||
368 | event->path.dentry = path->dentry; | ||
369 | event->path.mnt = path->mnt; | ||
370 | path_get(&event->path); | ||
371 | event->data_type = FSNOTIFY_EVENT_PATH; | ||
372 | break; | ||
373 | } | ||
374 | case FSNOTIFY_EVENT_PATH: { | ||
375 | struct path *path = data; | ||
376 | event->path.dentry = path->dentry; | ||
377 | event->path.mnt = path->mnt; | ||
378 | path_get(&event->path); | ||
379 | event->data_type = FSNOTIFY_EVENT_PATH; | ||
380 | break; | ||
381 | } | ||
382 | case FSNOTIFY_EVENT_INODE: | ||
383 | event->inode = data; | ||
384 | event->data_type = FSNOTIFY_EVENT_INODE; | ||
385 | break; | ||
386 | case FSNOTIFY_EVENT_NONE: | ||
387 | event->inode = NULL; | ||
388 | event->path.dentry = NULL; | ||
389 | event->path.mnt = NULL; | ||
390 | break; | ||
391 | default: | ||
392 | BUG(); | ||
393 | } | ||
394 | |||
395 | event->mask = mask; | ||
396 | |||
397 | return event; | ||
398 | } | ||
399 | |||
400 | __init int fsnotify_notification_init(void) | ||
401 | { | ||
402 | fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); | ||
403 | fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); | ||
404 | |||
405 | initialize_event(&q_overflow_event); | ||
406 | q_overflow_event.mask = FS_Q_OVERFLOW; | ||
407 | |||
408 | return 0; | ||
409 | } | ||
410 | subsys_initcall(fsnotify_notification_init); | ||
411 | |||
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index f76951dcd4a6..abaaa1cbf8de 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/string.h> | 26 | #include <linux/string.h> |
27 | #include <linux/spinlock.h> | 27 | #include <linux/spinlock.h> |
28 | #include <linux/blkdev.h> /* For bdev_hardsect_size(). */ | 28 | #include <linux/blkdev.h> /* For bdev_logical_block_size(). */ |
29 | #include <linux/backing-dev.h> | 29 | #include <linux/backing-dev.h> |
30 | #include <linux/buffer_head.h> | 30 | #include <linux/buffer_head.h> |
31 | #include <linux/vfs.h> | 31 | #include <linux/vfs.h> |
@@ -443,6 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
443 | ntfs_volume *vol = NTFS_SB(sb); | 443 | ntfs_volume *vol = NTFS_SB(sb); |
444 | 444 | ||
445 | ntfs_debug("Entering with remount options string: %s", opt); | 445 | ntfs_debug("Entering with remount options string: %s", opt); |
446 | |||
447 | lock_kernel(); | ||
446 | #ifndef NTFS_RW | 448 | #ifndef NTFS_RW |
447 | /* For read-only compiled driver, enforce read-only flag. */ | 449 | /* For read-only compiled driver, enforce read-only flag. */ |
448 | *flags |= MS_RDONLY; | 450 | *flags |= MS_RDONLY; |
@@ -466,15 +468,18 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
466 | if (NVolErrors(vol)) { | 468 | if (NVolErrors(vol)) { |
467 | ntfs_error(sb, "Volume has errors and is read-only%s", | 469 | ntfs_error(sb, "Volume has errors and is read-only%s", |
468 | es); | 470 | es); |
471 | unlock_kernel(); | ||
469 | return -EROFS; | 472 | return -EROFS; |
470 | } | 473 | } |
471 | if (vol->vol_flags & VOLUME_IS_DIRTY) { | 474 | if (vol->vol_flags & VOLUME_IS_DIRTY) { |
472 | ntfs_error(sb, "Volume is dirty and read-only%s", es); | 475 | ntfs_error(sb, "Volume is dirty and read-only%s", es); |
476 | unlock_kernel(); | ||
473 | return -EROFS; | 477 | return -EROFS; |
474 | } | 478 | } |
475 | if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { | 479 | if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { |
476 | ntfs_error(sb, "Volume has been modified by chkdsk " | 480 | ntfs_error(sb, "Volume has been modified by chkdsk " |
477 | "and is read-only%s", es); | 481 | "and is read-only%s", es); |
482 | unlock_kernel(); | ||
478 | return -EROFS; | 483 | return -EROFS; |
479 | } | 484 | } |
480 | if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { | 485 | if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { |
@@ -482,11 +487,13 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
482 | "(0x%x) and is read-only%s", | 487 | "(0x%x) and is read-only%s", |
483 | (unsigned)le16_to_cpu(vol->vol_flags), | 488 | (unsigned)le16_to_cpu(vol->vol_flags), |
484 | es); | 489 | es); |
490 | unlock_kernel(); | ||
485 | return -EROFS; | 491 | return -EROFS; |
486 | } | 492 | } |
487 | if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { | 493 | if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { |
488 | ntfs_error(sb, "Failed to set dirty bit in volume " | 494 | ntfs_error(sb, "Failed to set dirty bit in volume " |
489 | "information flags%s", es); | 495 | "information flags%s", es); |
496 | unlock_kernel(); | ||
490 | return -EROFS; | 497 | return -EROFS; |
491 | } | 498 | } |
492 | #if 0 | 499 | #if 0 |
@@ -506,18 +513,21 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
506 | ntfs_error(sb, "Failed to empty journal $LogFile%s", | 513 | ntfs_error(sb, "Failed to empty journal $LogFile%s", |
507 | es); | 514 | es); |
508 | NVolSetErrors(vol); | 515 | NVolSetErrors(vol); |
516 | unlock_kernel(); | ||
509 | return -EROFS; | 517 | return -EROFS; |
510 | } | 518 | } |
511 | if (!ntfs_mark_quotas_out_of_date(vol)) { | 519 | if (!ntfs_mark_quotas_out_of_date(vol)) { |
512 | ntfs_error(sb, "Failed to mark quotas out of date%s", | 520 | ntfs_error(sb, "Failed to mark quotas out of date%s", |
513 | es); | 521 | es); |
514 | NVolSetErrors(vol); | 522 | NVolSetErrors(vol); |
523 | unlock_kernel(); | ||
515 | return -EROFS; | 524 | return -EROFS; |
516 | } | 525 | } |
517 | if (!ntfs_stamp_usnjrnl(vol)) { | 526 | if (!ntfs_stamp_usnjrnl(vol)) { |
518 | ntfs_error(sb, "Failed to stamp transation log " | 527 | ntfs_error(sb, "Failed to stamp transation log " |
519 | "($UsnJrnl)%s", es); | 528 | "($UsnJrnl)%s", es); |
520 | NVolSetErrors(vol); | 529 | NVolSetErrors(vol); |
530 | unlock_kernel(); | ||
521 | return -EROFS; | 531 | return -EROFS; |
522 | } | 532 | } |
523 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { | 533 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { |
@@ -533,8 +543,11 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
533 | 543 | ||
534 | // TODO: Deal with *flags. | 544 | // TODO: Deal with *flags. |
535 | 545 | ||
536 | if (!parse_options(vol, opt)) | 546 | if (!parse_options(vol, opt)) { |
547 | unlock_kernel(); | ||
537 | return -EINVAL; | 548 | return -EINVAL; |
549 | } | ||
550 | unlock_kernel(); | ||
538 | ntfs_debug("Done."); | 551 | ntfs_debug("Done."); |
539 | return 0; | 552 | return 0; |
540 | } | 553 | } |
@@ -2246,6 +2259,9 @@ static void ntfs_put_super(struct super_block *sb) | |||
2246 | ntfs_volume *vol = NTFS_SB(sb); | 2259 | ntfs_volume *vol = NTFS_SB(sb); |
2247 | 2260 | ||
2248 | ntfs_debug("Entering."); | 2261 | ntfs_debug("Entering."); |
2262 | |||
2263 | lock_kernel(); | ||
2264 | |||
2249 | #ifdef NTFS_RW | 2265 | #ifdef NTFS_RW |
2250 | /* | 2266 | /* |
2251 | * Commit all inodes while they are still open in case some of them | 2267 | * Commit all inodes while they are still open in case some of them |
@@ -2373,39 +2389,12 @@ static void ntfs_put_super(struct super_block *sb) | |||
2373 | vol->mftmirr_ino = NULL; | 2389 | vol->mftmirr_ino = NULL; |
2374 | } | 2390 | } |
2375 | /* | 2391 | /* |
2376 | * If any dirty inodes are left, throw away all mft data page cache | 2392 | * We should have no dirty inodes left, due to |
2377 | * pages to allow a clean umount. This should never happen any more | 2393 | * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as |
2378 | * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as | 2394 | * the underlying mft records are written out and cleaned. |
2379 | * the underlying mft records are written out and cleaned. If it does, | ||
2380 | * happen anyway, we want to know... | ||
2381 | */ | 2395 | */ |
2382 | ntfs_commit_inode(vol->mft_ino); | 2396 | ntfs_commit_inode(vol->mft_ino); |
2383 | write_inode_now(vol->mft_ino, 1); | 2397 | write_inode_now(vol->mft_ino, 1); |
2384 | if (sb_has_dirty_inodes(sb)) { | ||
2385 | const char *s1, *s2; | ||
2386 | |||
2387 | mutex_lock(&vol->mft_ino->i_mutex); | ||
2388 | truncate_inode_pages(vol->mft_ino->i_mapping, 0); | ||
2389 | mutex_unlock(&vol->mft_ino->i_mutex); | ||
2390 | write_inode_now(vol->mft_ino, 1); | ||
2391 | if (sb_has_dirty_inodes(sb)) { | ||
2392 | static const char *_s1 = "inodes"; | ||
2393 | static const char *_s2 = ""; | ||
2394 | s1 = _s1; | ||
2395 | s2 = _s2; | ||
2396 | } else { | ||
2397 | static const char *_s1 = "mft pages"; | ||
2398 | static const char *_s2 = "They have been thrown " | ||
2399 | "away. "; | ||
2400 | s1 = _s1; | ||
2401 | s2 = _s2; | ||
2402 | } | ||
2403 | ntfs_error(sb, "Dirty %s found at umount time. %sYou should " | ||
2404 | "run chkdsk. Please email " | ||
2405 | "linux-ntfs-dev@lists.sourceforge.net and say " | ||
2406 | "that you saw this message. Thank you.", s1, | ||
2407 | s2); | ||
2408 | } | ||
2409 | #endif /* NTFS_RW */ | 2398 | #endif /* NTFS_RW */ |
2410 | 2399 | ||
2411 | iput(vol->mft_ino); | 2400 | iput(vol->mft_ino); |
@@ -2444,7 +2433,8 @@ static void ntfs_put_super(struct super_block *sb) | |||
2444 | } | 2433 | } |
2445 | sb->s_fs_info = NULL; | 2434 | sb->s_fs_info = NULL; |
2446 | kfree(vol); | 2435 | kfree(vol); |
2447 | return; | 2436 | |
2437 | unlock_kernel(); | ||
2448 | } | 2438 | } |
2449 | 2439 | ||
2450 | /** | 2440 | /** |
@@ -2785,13 +2775,13 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | |||
2785 | goto err_out_now; | 2775 | goto err_out_now; |
2786 | 2776 | ||
2787 | /* We support sector sizes up to the PAGE_CACHE_SIZE. */ | 2777 | /* We support sector sizes up to the PAGE_CACHE_SIZE. */ |
2788 | if (bdev_hardsect_size(sb->s_bdev) > PAGE_CACHE_SIZE) { | 2778 | if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) { |
2789 | if (!silent) | 2779 | if (!silent) |
2790 | ntfs_error(sb, "Device has unsupported sector size " | 2780 | ntfs_error(sb, "Device has unsupported sector size " |
2791 | "(%i). The maximum supported sector " | 2781 | "(%i). The maximum supported sector " |
2792 | "size on this architecture is %lu " | 2782 | "size on this architecture is %lu " |
2793 | "bytes.", | 2783 | "bytes.", |
2794 | bdev_hardsect_size(sb->s_bdev), | 2784 | bdev_logical_block_size(sb->s_bdev), |
2795 | PAGE_CACHE_SIZE); | 2785 | PAGE_CACHE_SIZE); |
2796 | goto err_out_now; | 2786 | goto err_out_now; |
2797 | } | 2787 | } |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 4f85eceab376..09cc25d04611 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1371,7 +1371,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1371 | 1371 | ||
1372 | bdevname(reg->hr_bdev, reg->hr_dev_name); | 1372 | bdevname(reg->hr_bdev, reg->hr_dev_name); |
1373 | 1373 | ||
1374 | sectsize = bdev_hardsect_size(reg->hr_bdev); | 1374 | sectsize = bdev_logical_block_size(reg->hr_bdev); |
1375 | if (sectsize != reg->hr_block_bytes) { | 1375 | if (sectsize != reg->hr_block_bytes) { |
1376 | mlog(ML_ERROR, | 1376 | mlog(ML_ERROR, |
1377 | "blocksize %u incorrect for device, expected %d", | 1377 | "blocksize %u incorrect for device, expected %d", |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 79ff8d9d37e0..201b40a441fe 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/mount.h> | 42 | #include <linux/mount.h> |
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <linux/quotaops.h> | 44 | #include <linux/quotaops.h> |
45 | #include <linux/smp_lock.h> | ||
45 | 46 | ||
46 | #define MLOG_MASK_PREFIX ML_SUPER | 47 | #define MLOG_MASK_PREFIX ML_SUPER |
47 | #include <cluster/masklog.h> | 48 | #include <cluster/masklog.h> |
@@ -126,7 +127,6 @@ static int ocfs2_get_sector(struct super_block *sb, | |||
126 | struct buffer_head **bh, | 127 | struct buffer_head **bh, |
127 | int block, | 128 | int block, |
128 | int sect_size); | 129 | int sect_size); |
129 | static void ocfs2_write_super(struct super_block *sb); | ||
130 | static struct inode *ocfs2_alloc_inode(struct super_block *sb); | 130 | static struct inode *ocfs2_alloc_inode(struct super_block *sb); |
131 | static void ocfs2_destroy_inode(struct inode *inode); | 131 | static void ocfs2_destroy_inode(struct inode *inode); |
132 | static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); | 132 | static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); |
@@ -141,7 +141,6 @@ static const struct super_operations ocfs2_sops = { | |||
141 | .clear_inode = ocfs2_clear_inode, | 141 | .clear_inode = ocfs2_clear_inode, |
142 | .delete_inode = ocfs2_delete_inode, | 142 | .delete_inode = ocfs2_delete_inode, |
143 | .sync_fs = ocfs2_sync_fs, | 143 | .sync_fs = ocfs2_sync_fs, |
144 | .write_super = ocfs2_write_super, | ||
145 | .put_super = ocfs2_put_super, | 144 | .put_super = ocfs2_put_super, |
146 | .remount_fs = ocfs2_remount, | 145 | .remount_fs = ocfs2_remount, |
147 | .show_options = ocfs2_show_options, | 146 | .show_options = ocfs2_show_options, |
@@ -365,24 +364,12 @@ static struct file_operations ocfs2_osb_debug_fops = { | |||
365 | .llseek = generic_file_llseek, | 364 | .llseek = generic_file_llseek, |
366 | }; | 365 | }; |
367 | 366 | ||
368 | /* | ||
369 | * write_super and sync_fs ripped right out of ext3. | ||
370 | */ | ||
371 | static void ocfs2_write_super(struct super_block *sb) | ||
372 | { | ||
373 | if (mutex_trylock(&sb->s_lock) != 0) | ||
374 | BUG(); | ||
375 | sb->s_dirt = 0; | ||
376 | } | ||
377 | |||
378 | static int ocfs2_sync_fs(struct super_block *sb, int wait) | 367 | static int ocfs2_sync_fs(struct super_block *sb, int wait) |
379 | { | 368 | { |
380 | int status; | 369 | int status; |
381 | tid_t target; | 370 | tid_t target; |
382 | struct ocfs2_super *osb = OCFS2_SB(sb); | 371 | struct ocfs2_super *osb = OCFS2_SB(sb); |
383 | 372 | ||
384 | sb->s_dirt = 0; | ||
385 | |||
386 | if (ocfs2_is_hard_readonly(osb)) | 373 | if (ocfs2_is_hard_readonly(osb)) |
387 | return -EROFS; | 374 | return -EROFS; |
388 | 375 | ||
@@ -595,6 +582,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
595 | struct mount_options parsed_options; | 582 | struct mount_options parsed_options; |
596 | struct ocfs2_super *osb = OCFS2_SB(sb); | 583 | struct ocfs2_super *osb = OCFS2_SB(sb); |
597 | 584 | ||
585 | lock_kernel(); | ||
586 | |||
598 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { | 587 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { |
599 | ret = -EINVAL; | 588 | ret = -EINVAL; |
600 | goto out; | 589 | goto out; |
@@ -698,6 +687,7 @@ unlock_osb: | |||
698 | ocfs2_set_journal_params(osb); | 687 | ocfs2_set_journal_params(osb); |
699 | } | 688 | } |
700 | out: | 689 | out: |
690 | unlock_kernel(); | ||
701 | return ret; | 691 | return ret; |
702 | } | 692 | } |
703 | 693 | ||
@@ -713,7 +703,7 @@ static int ocfs2_sb_probe(struct super_block *sb, | |||
713 | *bh = NULL; | 703 | *bh = NULL; |
714 | 704 | ||
715 | /* may be > 512 */ | 705 | /* may be > 512 */ |
716 | *sector_size = bdev_hardsect_size(sb->s_bdev); | 706 | *sector_size = bdev_logical_block_size(sb->s_bdev); |
717 | if (*sector_size > OCFS2_MAX_BLOCKSIZE) { | 707 | if (*sector_size > OCFS2_MAX_BLOCKSIZE) { |
718 | mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", | 708 | mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", |
719 | *sector_size, OCFS2_MAX_BLOCKSIZE); | 709 | *sector_size, OCFS2_MAX_BLOCKSIZE); |
@@ -1550,9 +1540,13 @@ static void ocfs2_put_super(struct super_block *sb) | |||
1550 | { | 1540 | { |
1551 | mlog_entry("(0x%p)\n", sb); | 1541 | mlog_entry("(0x%p)\n", sb); |
1552 | 1542 | ||
1543 | lock_kernel(); | ||
1544 | |||
1553 | ocfs2_sync_blockdev(sb); | 1545 | ocfs2_sync_blockdev(sb); |
1554 | ocfs2_dismount_volume(sb, 0); | 1546 | ocfs2_dismount_volume(sb, 0); |
1555 | 1547 | ||
1548 | unlock_kernel(); | ||
1549 | |||
1556 | mlog_exit_void(); | 1550 | mlog_exit_void(); |
1557 | } | 1551 | } |
1558 | 1552 | ||
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 834b2331f6b3..d17e774eaf45 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -11,21 +11,6 @@ | |||
11 | #include <linux/mpage.h> | 11 | #include <linux/mpage.h> |
12 | #include "omfs.h" | 12 | #include "omfs.h" |
13 | 13 | ||
14 | static int omfs_sync_file(struct file *file, struct dentry *dentry, | ||
15 | int datasync) | ||
16 | { | ||
17 | struct inode *inode = dentry->d_inode; | ||
18 | int err; | ||
19 | |||
20 | err = sync_mapping_buffers(inode->i_mapping); | ||
21 | if (!(inode->i_state & I_DIRTY)) | ||
22 | return err; | ||
23 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
24 | return err; | ||
25 | err |= omfs_sync_inode(inode); | ||
26 | return err ? -EIO : 0; | ||
27 | } | ||
28 | |||
29 | static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset) | 14 | static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset) |
30 | { | 15 | { |
31 | return (sbi->s_sys_blocksize - offset - | 16 | return (sbi->s_sys_blocksize - offset - |
@@ -344,7 +329,7 @@ struct file_operations omfs_file_operations = { | |||
344 | .aio_read = generic_file_aio_read, | 329 | .aio_read = generic_file_aio_read, |
345 | .aio_write = generic_file_aio_write, | 330 | .aio_write = generic_file_aio_write, |
346 | .mmap = generic_file_mmap, | 331 | .mmap = generic_file_mmap, |
347 | .fsync = omfs_sync_file, | 332 | .fsync = simple_fsync, |
348 | .splice_read = generic_file_splice_read, | 333 | .splice_read = generic_file_splice_read, |
349 | }; | 334 | }; |
350 | 335 | ||
@@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) | |||
612 | 612 | ||
613 | audit_inode(NULL, dentry); | 613 | audit_inode(NULL, dentry); |
614 | 614 | ||
615 | err = mnt_want_write(file->f_path.mnt); | 615 | err = mnt_want_write_file(file); |
616 | if (err) | 616 | if (err) |
617 | goto out_putf; | 617 | goto out_putf; |
618 | mutex_lock(&inode->i_mutex); | 618 | mutex_lock(&inode->i_mutex); |
@@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) | |||
761 | if (!file) | 761 | if (!file) |
762 | goto out; | 762 | goto out; |
763 | 763 | ||
764 | error = mnt_want_write(file->f_path.mnt); | 764 | error = mnt_want_write_file(file); |
765 | if (error) | 765 | if (error) |
766 | goto out_fput; | 766 | goto out_fput; |
767 | dentry = file->f_path.dentry; | 767 | dentry = file->f_path.dentry; |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 99e33ef40be4..1a9c7878f864 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -219,6 +219,13 @@ ssize_t part_size_show(struct device *dev, | |||
219 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 219 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
220 | } | 220 | } |
221 | 221 | ||
222 | ssize_t part_alignment_offset_show(struct device *dev, | ||
223 | struct device_attribute *attr, char *buf) | ||
224 | { | ||
225 | struct hd_struct *p = dev_to_part(dev); | ||
226 | return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); | ||
227 | } | ||
228 | |||
222 | ssize_t part_stat_show(struct device *dev, | 229 | ssize_t part_stat_show(struct device *dev, |
223 | struct device_attribute *attr, char *buf) | 230 | struct device_attribute *attr, char *buf) |
224 | { | 231 | { |
@@ -272,6 +279,7 @@ ssize_t part_fail_store(struct device *dev, | |||
272 | static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); | 279 | static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); |
273 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); | 280 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); |
274 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | 281 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
282 | static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); | ||
275 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | 283 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); |
276 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 284 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
277 | static struct device_attribute dev_attr_fail = | 285 | static struct device_attribute dev_attr_fail = |
@@ -282,6 +290,7 @@ static struct attribute *part_attrs[] = { | |||
282 | &dev_attr_partition.attr, | 290 | &dev_attr_partition.attr, |
283 | &dev_attr_start.attr, | 291 | &dev_attr_start.attr, |
284 | &dev_attr_size.attr, | 292 | &dev_attr_size.attr, |
293 | &dev_attr_alignment_offset.attr, | ||
285 | &dev_attr_stat.attr, | 294 | &dev_attr_stat.attr, |
286 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 295 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
287 | &dev_attr_fail.attr, | 296 | &dev_attr_fail.attr, |
@@ -383,6 +392,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
383 | pdev = part_to_dev(p); | 392 | pdev = part_to_dev(p); |
384 | 393 | ||
385 | p->start_sect = start; | 394 | p->start_sect = start; |
395 | p->alignment_offset = queue_sector_alignment_offset(disk->queue, start); | ||
386 | p->nr_sects = len; | 396 | p->nr_sects = len; |
387 | p->partno = partno; | 397 | p->partno = partno; |
388 | p->policy = get_disk_ro(disk); | 398 | p->policy = get_disk_ro(disk); |
@@ -546,27 +556,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
546 | 556 | ||
547 | /* add partitions */ | 557 | /* add partitions */ |
548 | for (p = 1; p < state->limit; p++) { | 558 | for (p = 1; p < state->limit; p++) { |
549 | sector_t size = state->parts[p].size; | 559 | sector_t size, from; |
550 | sector_t from = state->parts[p].from; | 560 | try_scan: |
561 | size = state->parts[p].size; | ||
551 | if (!size) | 562 | if (!size) |
552 | continue; | 563 | continue; |
564 | |||
565 | from = state->parts[p].from; | ||
553 | if (from >= get_capacity(disk)) { | 566 | if (from >= get_capacity(disk)) { |
554 | printk(KERN_WARNING | 567 | printk(KERN_WARNING |
555 | "%s: p%d ignored, start %llu is behind the end of the disk\n", | 568 | "%s: p%d ignored, start %llu is behind the end of the disk\n", |
556 | disk->disk_name, p, (unsigned long long) from); | 569 | disk->disk_name, p, (unsigned long long) from); |
557 | continue; | 570 | continue; |
558 | } | 571 | } |
572 | |||
559 | if (from + size > get_capacity(disk)) { | 573 | if (from + size > get_capacity(disk)) { |
560 | /* | 574 | struct block_device_operations *bdops = disk->fops; |
561 | * we can not ignore partitions of broken tables | 575 | unsigned long long capacity; |
562 | * created by for example camera firmware, but we | 576 | |
563 | * limit them to the end of the disk to avoid | ||
564 | * creating invalid block devices | ||
565 | */ | ||
566 | printk(KERN_WARNING | 577 | printk(KERN_WARNING |
567 | "%s: p%d size %llu limited to end of disk\n", | 578 | "%s: p%d size %llu exceeds device capacity, ", |
568 | disk->disk_name, p, (unsigned long long) size); | 579 | disk->disk_name, p, (unsigned long long) size); |
569 | size = get_capacity(disk) - from; | 580 | |
581 | if (bdops->set_capacity && | ||
582 | (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { | ||
583 | printk(KERN_CONT "enabling native capacity\n"); | ||
584 | capacity = bdops->set_capacity(disk, ~0ULL); | ||
585 | disk->flags |= GENHD_FL_NATIVE_CAPACITY; | ||
586 | if (capacity > get_capacity(disk)) { | ||
587 | set_capacity(disk, capacity); | ||
588 | check_disk_size_change(disk, bdev); | ||
589 | bdev->bd_invalidated = 0; | ||
590 | } | ||
591 | goto try_scan; | ||
592 | } else { | ||
593 | /* | ||
594 | * we can not ignore partitions of broken tables | ||
595 | * created by for example camera firmware, but | ||
596 | * we limit them to the end of the disk to avoid | ||
597 | * creating invalid block devices | ||
598 | */ | ||
599 | printk(KERN_CONT "limited to end of disk\n"); | ||
600 | size = get_capacity(disk) - from; | ||
601 | } | ||
570 | } | 602 | } |
571 | part = add_partition(disk, p, from, size, | 603 | part = add_partition(disk, p, from, size, |
572 | state->parts[p].flags); | 604 | state->parts[p].flags); |
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 46297683cd34..fc71aab08460 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
@@ -76,7 +76,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
76 | Sector sect; | 76 | Sector sect; |
77 | 77 | ||
78 | res = 0; | 78 | res = 0; |
79 | blocksize = bdev_hardsect_size(bdev); | 79 | blocksize = bdev_logical_block_size(bdev); |
80 | if (blocksize <= 0) | 80 | if (blocksize <= 0) |
81 | goto out_exit; | 81 | goto out_exit; |
82 | i_size = i_size_read(bdev->bd_inode); | 82 | i_size = i_size_read(bdev->bd_inode); |
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 796511886f28..0028d2ef0662 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c | |||
@@ -110,7 +110,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, | |||
110 | Sector sect; | 110 | Sector sect; |
111 | unsigned char *data; | 111 | unsigned char *data; |
112 | u32 this_sector, this_size; | 112 | u32 this_sector, this_size; |
113 | int sector_size = bdev_hardsect_size(bdev) / 512; | 113 | int sector_size = bdev_logical_block_size(bdev) / 512; |
114 | int loopct = 0; /* number of links followed | 114 | int loopct = 0; /* number of links followed |
115 | without finding a data partition */ | 115 | without finding a data partition */ |
116 | int i; | 116 | int i; |
@@ -415,7 +415,7 @@ static struct { | |||
415 | 415 | ||
416 | int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | 416 | int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) |
417 | { | 417 | { |
418 | int sector_size = bdev_hardsect_size(bdev) / 512; | 418 | int sector_size = bdev_logical_block_size(bdev) / 512; |
419 | Sector sect; | 419 | Sector sect; |
420 | unsigned char *data; | 420 | unsigned char *data; |
421 | struct partition *p; | 421 | struct partition *p; |
@@ -302,6 +302,20 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info, | |||
302 | return 0; | 302 | return 0; |
303 | } | 303 | } |
304 | 304 | ||
305 | /** | ||
306 | * generic_pipe_buf_release - put a reference to a &struct pipe_buffer | ||
307 | * @pipe: the pipe that the buffer belongs to | ||
308 | * @buf: the buffer to put a reference to | ||
309 | * | ||
310 | * Description: | ||
311 | * This function releases a reference to @buf. | ||
312 | */ | ||
313 | void generic_pipe_buf_release(struct pipe_inode_info *pipe, | ||
314 | struct pipe_buffer *buf) | ||
315 | { | ||
316 | page_cache_release(buf->page); | ||
317 | } | ||
318 | |||
305 | static const struct pipe_buf_operations anon_pipe_buf_ops = { | 319 | static const struct pipe_buf_operations anon_pipe_buf_ops = { |
306 | .can_merge = 1, | 320 | .can_merge = 1, |
307 | .map = generic_pipe_buf_map, | 321 | .map = generic_pipe_buf_map, |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 3326bbf9ab95..1539e630c47d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2128,9 +2128,15 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | |||
2128 | if (copy_from_user(page, buf, count)) | 2128 | if (copy_from_user(page, buf, count)) |
2129 | goto out_free; | 2129 | goto out_free; |
2130 | 2130 | ||
2131 | /* Guard against adverse ptrace interaction */ | ||
2132 | length = mutex_lock_interruptible(&task->cred_guard_mutex); | ||
2133 | if (length < 0) | ||
2134 | goto out_free; | ||
2135 | |||
2131 | length = security_setprocattr(task, | 2136 | length = security_setprocattr(task, |
2132 | (char*)file->f_path.dentry->d_name.name, | 2137 | (char*)file->f_path.dentry->d_name.name, |
2133 | (void*)page, count); | 2138 | (void*)page, count); |
2139 | mutex_unlock(&task->cred_guard_mutex); | ||
2134 | out_free: | 2140 | out_free: |
2135 | free_page((unsigned long) page); | 2141 | free_page((unsigned long) page); |
2136 | out: | 2142 | out: |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index f6db9618a888..753ca37002c8 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -92,3 +92,28 @@ struct pde_opener { | |||
92 | struct list_head lh; | 92 | struct list_head lh; |
93 | }; | 93 | }; |
94 | void pde_users_dec(struct proc_dir_entry *pde); | 94 | void pde_users_dec(struct proc_dir_entry *pde); |
95 | |||
96 | extern spinlock_t proc_subdir_lock; | ||
97 | |||
98 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); | ||
99 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); | ||
100 | unsigned long task_vsize(struct mm_struct *); | ||
101 | int task_statm(struct mm_struct *, int *, int *, int *, int *); | ||
102 | void task_mem(struct seq_file *, struct mm_struct *); | ||
103 | |||
104 | struct proc_dir_entry *de_get(struct proc_dir_entry *de); | ||
105 | void de_put(struct proc_dir_entry *de); | ||
106 | |||
107 | extern struct vfsmount *proc_mnt; | ||
108 | int proc_fill_super(struct super_block *); | ||
109 | struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); | ||
110 | |||
111 | /* | ||
112 | * These are generic /proc routines that use the internal | ||
113 | * "struct proc_dir_entry" tree to traverse the filesystem. | ||
114 | * | ||
115 | * The /proc root directory has extended versions to take care | ||
116 | * of the /proc/<pid> subdirectories. | ||
117 | */ | ||
118 | int proc_readdir(struct file *, void *, filldir_t); | ||
119 | struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); | ||
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 9bca39cf99ee..1afa4dd4cae2 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c | |||
@@ -12,20 +12,14 @@ | |||
12 | 12 | ||
13 | static int loadavg_proc_show(struct seq_file *m, void *v) | 13 | static int loadavg_proc_show(struct seq_file *m, void *v) |
14 | { | 14 | { |
15 | int a, b, c; | 15 | unsigned long avnrun[3]; |
16 | unsigned long seq; | ||
17 | 16 | ||
18 | do { | 17 | get_avenrun(avnrun, FIXED_1/200, 0); |
19 | seq = read_seqbegin(&xtime_lock); | ||
20 | a = avenrun[0] + (FIXED_1/200); | ||
21 | b = avenrun[1] + (FIXED_1/200); | ||
22 | c = avenrun[2] + (FIXED_1/200); | ||
23 | } while (read_seqretry(&xtime_lock, seq)); | ||
24 | 18 | ||
25 | seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", | 19 | seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n", |
26 | LOAD_INT(a), LOAD_FRAC(a), | 20 | LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]), |
27 | LOAD_INT(b), LOAD_FRAC(b), | 21 | LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), |
28 | LOAD_INT(c), LOAD_FRAC(c), | 22 | LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), |
29 | nr_running(), nr_threads, | 23 | nr_running(), nr_threads, |
30 | task_active_pid_ns(current)->last_pid); | 24 | task_active_pid_ns(current)->last_pid); |
31 | return 0; | 25 | return 0; |
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index de2bba5a3440..fc6c3025befd 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <asm/prom.h> | 12 | #include <asm/prom.h> |
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | #include "internal.h" | ||
14 | 15 | ||
15 | #ifndef HAVE_ARCH_DEVTREE_FIXUPS | 16 | #ifndef HAVE_ARCH_DEVTREE_FIXUPS |
16 | static inline void set_node_proc_entry(struct device_node *np, | 17 | static inline void set_node_proc_entry(struct device_node *np, |
diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile index 502d7fe98bab..e4d408cc5473 100644 --- a/fs/qnx4/Makefile +++ b/fs/qnx4/Makefile | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_QNX4FS_FS) += qnx4.o | 5 | obj-$(CONFIG_QNX4FS_FS) += qnx4.o |
6 | 6 | ||
7 | qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o | 7 | qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o |
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index 8425cf6e9624..e1cd061a25f7 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c | |||
@@ -13,14 +13,9 @@ | |||
13 | * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) . | 13 | * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) . |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/time.h> | ||
17 | #include <linux/fs.h> | ||
18 | #include <linux/qnx4_fs.h> | ||
19 | #include <linux/stat.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
23 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
18 | #include "qnx4.h" | ||
24 | 19 | ||
25 | #if 0 | 20 | #if 0 |
26 | int qnx4_new_block(struct super_block *sb) | 21 | int qnx4_new_block(struct super_block *sb) |
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index ea9ffefb48ad..003c68f3238b 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c | |||
@@ -11,14 +11,9 @@ | |||
11 | * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. | 11 | * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/fs.h> | ||
17 | #include <linux/qnx4_fs.h> | ||
18 | #include <linux/stat.h> | ||
19 | #include <linux/smp_lock.h> | 14 | #include <linux/smp_lock.h> |
20 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
21 | 16 | #include "qnx4.h" | |
22 | 17 | ||
23 | static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) | 18 | static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) |
24 | { | 19 | { |
@@ -84,7 +79,7 @@ const struct file_operations qnx4_dir_operations = | |||
84 | { | 79 | { |
85 | .read = generic_read_dir, | 80 | .read = generic_read_dir, |
86 | .readdir = qnx4_readdir, | 81 | .readdir = qnx4_readdir, |
87 | .fsync = file_fsync, | 82 | .fsync = simple_fsync, |
88 | }; | 83 | }; |
89 | 84 | ||
90 | const struct inode_operations qnx4_dir_inode_operations = | 85 | const struct inode_operations qnx4_dir_inode_operations = |
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index 867f42b02035..09b170ac936c 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c | |||
@@ -12,8 +12,7 @@ | |||
12 | * 27-06-1998 by Frank Denis : file overwriting. | 12 | * 27-06-1998 by Frank Denis : file overwriting. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/fs.h> | 15 | #include "qnx4.h" |
16 | #include <linux/qnx4_fs.h> | ||
17 | 16 | ||
18 | /* | 17 | /* |
19 | * We have mostly NULL's here: the current defaults are ok for | 18 | * We have mostly NULL's here: the current defaults are ok for |
@@ -29,7 +28,7 @@ const struct file_operations qnx4_file_operations = | |||
29 | #ifdef CONFIG_QNX4FS_RW | 28 | #ifdef CONFIG_QNX4FS_RW |
30 | .write = do_sync_write, | 29 | .write = do_sync_write, |
31 | .aio_write = generic_file_aio_write, | 30 | .aio_write = generic_file_aio_write, |
32 | .fsync = qnx4_sync_file, | 31 | .fsync = simple_fsync, |
33 | #endif | 32 | #endif |
34 | }; | 33 | }; |
35 | 34 | ||
diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c deleted file mode 100644 index aa3b19544bee..000000000000 --- a/fs/qnx4/fsync.c +++ /dev/null | |||
@@ -1,169 +0,0 @@ | |||
1 | /* | ||
2 | * QNX4 file system, Linux implementation. | ||
3 | * | ||
4 | * Version : 0.1 | ||
5 | * | ||
6 | * Using parts of the xiafs filesystem. | ||
7 | * | ||
8 | * History : | ||
9 | * | ||
10 | * 24-03-1998 by Richard Frowijn : first release. | ||
11 | */ | ||
12 | |||
13 | #include <linux/errno.h> | ||
14 | #include <linux/time.h> | ||
15 | #include <linux/stat.h> | ||
16 | #include <linux/fcntl.h> | ||
17 | #include <linux/smp_lock.h> | ||
18 | #include <linux/buffer_head.h> | ||
19 | |||
20 | #include <linux/fs.h> | ||
21 | #include <linux/qnx4_fs.h> | ||
22 | |||
23 | #include <asm/system.h> | ||
24 | |||
25 | /* | ||
26 | * The functions for qnx4 fs file synchronization. | ||
27 | */ | ||
28 | |||
29 | #ifdef CONFIG_QNX4FS_RW | ||
30 | |||
31 | static int sync_block(struct inode *inode, unsigned short *block, int wait) | ||
32 | { | ||
33 | struct buffer_head *bh; | ||
34 | unsigned short tmp; | ||
35 | |||
36 | if (!*block) | ||
37 | return 0; | ||
38 | tmp = *block; | ||
39 | bh = sb_find_get_block(inode->i_sb, *block); | ||
40 | if (!bh) | ||
41 | return 0; | ||
42 | if (*block != tmp) { | ||
43 | brelse(bh); | ||
44 | return 1; | ||
45 | } | ||
46 | if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { | ||
47 | brelse(bh); | ||
48 | return -1; | ||
49 | } | ||
50 | if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) { | ||
51 | brelse(bh); | ||
52 | return 0; | ||
53 | } | ||
54 | ll_rw_block(WRITE, 1, &bh); | ||
55 | atomic_dec(&bh->b_count); | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | #ifdef WTF | ||
60 | static int sync_iblock(struct inode *inode, unsigned short *iblock, | ||
61 | struct buffer_head **bh, int wait) | ||
62 | { | ||
63 | int rc; | ||
64 | unsigned short tmp; | ||
65 | |||
66 | *bh = NULL; | ||
67 | tmp = *iblock; | ||
68 | if (!tmp) | ||
69 | return 0; | ||
70 | rc = sync_block(inode, iblock, wait); | ||
71 | if (rc) | ||
72 | return rc; | ||
73 | *bh = sb_bread(inode->i_sb, tmp); | ||
74 | if (tmp != *iblock) { | ||
75 | brelse(*bh); | ||
76 | *bh = NULL; | ||
77 | return 1; | ||
78 | } | ||
79 | if (!*bh) | ||
80 | return -1; | ||
81 | return 0; | ||
82 | } | ||
83 | #endif | ||
84 | |||
85 | static int sync_direct(struct inode *inode, int wait) | ||
86 | { | ||
87 | int i; | ||
88 | int rc, err = 0; | ||
89 | |||
90 | for (i = 0; i < 7; i++) { | ||
91 | rc = sync_block(inode, | ||
92 | (unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait); | ||
93 | if (rc > 0) | ||
94 | break; | ||
95 | if (rc) | ||
96 | err = rc; | ||
97 | } | ||
98 | return err; | ||
99 | } | ||
100 | |||
101 | #ifdef WTF | ||
102 | static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait) | ||
103 | { | ||
104 | int i; | ||
105 | struct buffer_head *ind_bh; | ||
106 | int rc, err = 0; | ||
107 | |||
108 | rc = sync_iblock(inode, iblock, &ind_bh, wait); | ||
109 | if (rc || !ind_bh) | ||
110 | return rc; | ||
111 | |||
112 | for (i = 0; i < 512; i++) { | ||
113 | rc = sync_block(inode, | ||
114 | ((unsigned short *) ind_bh->b_data) + i, | ||
115 | wait); | ||
116 | if (rc > 0) | ||
117 | break; | ||
118 | if (rc) | ||
119 | err = rc; | ||
120 | } | ||
121 | brelse(ind_bh); | ||
122 | return err; | ||
123 | } | ||
124 | |||
125 | static int sync_dindirect(struct inode *inode, unsigned short *diblock, | ||
126 | int wait) | ||
127 | { | ||
128 | int i; | ||
129 | struct buffer_head *dind_bh; | ||
130 | int rc, err = 0; | ||
131 | |||
132 | rc = sync_iblock(inode, diblock, &dind_bh, wait); | ||
133 | if (rc || !dind_bh) | ||
134 | return rc; | ||
135 | |||
136 | for (i = 0; i < 512; i++) { | ||
137 | rc = sync_indirect(inode, | ||
138 | ((unsigned short *) dind_bh->b_data) + i, | ||
139 | wait); | ||
140 | if (rc > 0) | ||
141 | break; | ||
142 | if (rc) | ||
143 | err = rc; | ||
144 | } | ||
145 | brelse(dind_bh); | ||
146 | return err; | ||
147 | } | ||
148 | #endif | ||
149 | |||
150 | int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused) | ||
151 | { | ||
152 | struct inode *inode = dentry->d_inode; | ||
153 | int wait, err = 0; | ||
154 | |||
155 | (void) file; | ||
156 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
157 | S_ISLNK(inode->i_mode))) | ||
158 | return -EINVAL; | ||
159 | |||
160 | lock_kernel(); | ||
161 | for (wait = 0; wait <= 1; wait++) { | ||
162 | err |= sync_direct(inode, wait); | ||
163 | } | ||
164 | err |= qnx4_sync_inode(inode); | ||
165 | unlock_kernel(); | ||
166 | return (err < 0) ? -EIO : 0; | ||
167 | } | ||
168 | |||
169 | #endif | ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fe1f0f31d11c..681df5fcd161 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -13,19 +13,15 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/types.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/qnx4_fs.h> | ||
22 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/slab.h> | ||
23 | #include <linux/highuid.h> | 18 | #include <linux/highuid.h> |
24 | #include <linux/smp_lock.h> | 19 | #include <linux/smp_lock.h> |
25 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
26 | #include <linux/buffer_head.h> | 21 | #include <linux/buffer_head.h> |
27 | #include <linux/vfs.h> | 22 | #include <linux/writeback.h> |
28 | #include <asm/uaccess.h> | 23 | #include <linux/statfs.h> |
24 | #include "qnx4.h" | ||
29 | 25 | ||
30 | #define QNX4_VERSION 4 | 26 | #define QNX4_VERSION 4 |
31 | #define QNX4_BMNAME ".bitmap" | 27 | #define QNX4_BMNAME ".bitmap" |
@@ -34,31 +30,6 @@ static const struct super_operations qnx4_sops; | |||
34 | 30 | ||
35 | #ifdef CONFIG_QNX4FS_RW | 31 | #ifdef CONFIG_QNX4FS_RW |
36 | 32 | ||
37 | int qnx4_sync_inode(struct inode *inode) | ||
38 | { | ||
39 | int err = 0; | ||
40 | # if 0 | ||
41 | struct buffer_head *bh; | ||
42 | |||
43 | bh = qnx4_update_inode(inode); | ||
44 | if (bh && buffer_dirty(bh)) | ||
45 | { | ||
46 | sync_dirty_buffer(bh); | ||
47 | if (buffer_req(bh) && !buffer_uptodate(bh)) | ||
48 | { | ||
49 | printk ("IO error syncing qnx4 inode [%s:%08lx]\n", | ||
50 | inode->i_sb->s_id, inode->i_ino); | ||
51 | err = -1; | ||
52 | } | ||
53 | brelse (bh); | ||
54 | } else if (!bh) { | ||
55 | err = -1; | ||
56 | } | ||
57 | # endif | ||
58 | |||
59 | return err; | ||
60 | } | ||
61 | |||
62 | static void qnx4_delete_inode(struct inode *inode) | 33 | static void qnx4_delete_inode(struct inode *inode) |
63 | { | 34 | { |
64 | QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); | 35 | QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); |
@@ -70,15 +41,7 @@ static void qnx4_delete_inode(struct inode *inode) | |||
70 | unlock_kernel(); | 41 | unlock_kernel(); |
71 | } | 42 | } |
72 | 43 | ||
73 | static void qnx4_write_super(struct super_block *sb) | 44 | static int qnx4_write_inode(struct inode *inode, int do_sync) |
74 | { | ||
75 | lock_kernel(); | ||
76 | QNX4DEBUG(("qnx4: write_super\n")); | ||
77 | sb->s_dirt = 0; | ||
78 | unlock_kernel(); | ||
79 | } | ||
80 | |||
81 | static int qnx4_write_inode(struct inode *inode, int unused) | ||
82 | { | 45 | { |
83 | struct qnx4_inode_entry *raw_inode; | 46 | struct qnx4_inode_entry *raw_inode; |
84 | int block, ino; | 47 | int block, ino; |
@@ -115,6 +78,16 @@ static int qnx4_write_inode(struct inode *inode, int unused) | |||
115 | raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | 78 | raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec); |
116 | raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks); | 79 | raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks); |
117 | mark_buffer_dirty(bh); | 80 | mark_buffer_dirty(bh); |
81 | if (do_sync) { | ||
82 | sync_dirty_buffer(bh); | ||
83 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | ||
84 | printk("qnx4: IO error syncing inode [%s:%08x]\n", | ||
85 | inode->i_sb->s_id, ino); | ||
86 | brelse(bh); | ||
87 | unlock_kernel(); | ||
88 | return -EIO; | ||
89 | } | ||
90 | } | ||
118 | brelse(bh); | 91 | brelse(bh); |
119 | unlock_kernel(); | 92 | unlock_kernel(); |
120 | return 0; | 93 | return 0; |
@@ -138,7 +111,6 @@ static const struct super_operations qnx4_sops = | |||
138 | #ifdef CONFIG_QNX4FS_RW | 111 | #ifdef CONFIG_QNX4FS_RW |
139 | .write_inode = qnx4_write_inode, | 112 | .write_inode = qnx4_write_inode, |
140 | .delete_inode = qnx4_delete_inode, | 113 | .delete_inode = qnx4_delete_inode, |
141 | .write_super = qnx4_write_super, | ||
142 | #endif | 114 | #endif |
143 | }; | 115 | }; |
144 | 116 | ||
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 775eed3a4085..5972ed214937 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c | |||
@@ -12,16 +12,9 @@ | |||
12 | * 04-07-1998 by Frank Denis : first step for rmdir/unlink. | 12 | * 04-07-1998 by Frank Denis : first step for rmdir/unlink. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/time.h> | ||
16 | #include <linux/fs.h> | ||
17 | #include <linux/qnx4_fs.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/string.h> | ||
20 | #include <linux/stat.h> | ||
21 | #include <linux/fcntl.h> | ||
22 | #include <linux/errno.h> | ||
23 | #include <linux/smp_lock.h> | 15 | #include <linux/smp_lock.h> |
24 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
17 | #include "qnx4.h" | ||
25 | 18 | ||
26 | 19 | ||
27 | /* | 20 | /* |
@@ -187,7 +180,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry) | |||
187 | de->di_status = 0; | 180 | de->di_status = 0; |
188 | memset(de->di_fname, 0, sizeof de->di_fname); | 181 | memset(de->di_fname, 0, sizeof de->di_fname); |
189 | de->di_mode = 0; | 182 | de->di_mode = 0; |
190 | mark_buffer_dirty(bh); | 183 | mark_buffer_dirty_inode(bh, dir); |
191 | clear_nlink(inode); | 184 | clear_nlink(inode); |
192 | mark_inode_dirty(inode); | 185 | mark_inode_dirty(inode); |
193 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; | 186 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; |
@@ -232,7 +225,7 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry) | |||
232 | de->di_status = 0; | 225 | de->di_status = 0; |
233 | memset(de->di_fname, 0, sizeof de->di_fname); | 226 | memset(de->di_fname, 0, sizeof de->di_fname); |
234 | de->di_mode = 0; | 227 | de->di_mode = 0; |
235 | mark_buffer_dirty(bh); | 228 | mark_buffer_dirty_inode(bh, dir); |
236 | dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; | 229 | dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; |
237 | mark_inode_dirty(dir); | 230 | mark_inode_dirty(dir); |
238 | inode->i_ctime = dir->i_ctime; | 231 | inode->i_ctime = dir->i_ctime; |
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h new file mode 100644 index 000000000000..9efc089454f6 --- /dev/null +++ b/fs/qnx4/qnx4.h | |||
@@ -0,0 +1,57 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/qnx4_fs.h> | ||
3 | |||
4 | #define QNX4_DEBUG 0 | ||
5 | |||
6 | #if QNX4_DEBUG | ||
7 | #define QNX4DEBUG(X) printk X | ||
8 | #else | ||
9 | #define QNX4DEBUG(X) (void) 0 | ||
10 | #endif | ||
11 | |||
12 | struct qnx4_sb_info { | ||
13 | struct buffer_head *sb_buf; /* superblock buffer */ | ||
14 | struct qnx4_super_block *sb; /* our superblock */ | ||
15 | unsigned int Version; /* may be useful */ | ||
16 | struct qnx4_inode_entry *BitMap; /* useful */ | ||
17 | }; | ||
18 | |||
19 | struct qnx4_inode_info { | ||
20 | struct qnx4_inode_entry raw; | ||
21 | loff_t mmu_private; | ||
22 | struct inode vfs_inode; | ||
23 | }; | ||
24 | |||
25 | extern struct inode *qnx4_iget(struct super_block *, unsigned long); | ||
26 | extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); | ||
27 | extern unsigned long qnx4_count_free_blocks(struct super_block *sb); | ||
28 | extern unsigned long qnx4_block_map(struct inode *inode, long iblock); | ||
29 | |||
30 | extern struct buffer_head *qnx4_bread(struct inode *, int, int); | ||
31 | |||
32 | extern const struct inode_operations qnx4_file_inode_operations; | ||
33 | extern const struct inode_operations qnx4_dir_inode_operations; | ||
34 | extern const struct file_operations qnx4_file_operations; | ||
35 | extern const struct file_operations qnx4_dir_operations; | ||
36 | extern int qnx4_is_free(struct super_block *sb, long block); | ||
37 | extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); | ||
38 | extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); | ||
39 | extern void qnx4_truncate(struct inode *inode); | ||
40 | extern void qnx4_free_inode(struct inode *inode); | ||
41 | extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); | ||
42 | extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); | ||
43 | |||
44 | static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) | ||
45 | { | ||
46 | return sb->s_fs_info; | ||
47 | } | ||
48 | |||
49 | static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) | ||
50 | { | ||
51 | return container_of(inode, struct qnx4_inode_info, vfs_inode); | ||
52 | } | ||
53 | |||
54 | static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) | ||
55 | { | ||
56 | return &qnx4_i(inode)->raw; | ||
57 | } | ||
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c index 6437c1c3d1dd..d94d9ee241fe 100644 --- a/fs/qnx4/truncate.c +++ b/fs/qnx4/truncate.c | |||
@@ -10,12 +10,8 @@ | |||
10 | * 30-06-1998 by Frank DENIS : ugly filler. | 10 | * 30-06-1998 by Frank DENIS : ugly filler. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/types.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/qnx4_fs.h> | ||
17 | #include <linux/smp_lock.h> | 13 | #include <linux/smp_lock.h> |
18 | #include <asm/uaccess.h> | 14 | #include "qnx4.h" |
19 | 15 | ||
20 | #ifdef CONFIG_QNX4FS_RW | 16 | #ifdef CONFIG_QNX4FS_RW |
21 | 17 | ||
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b7f5a468f076..95c5b42384b2 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -159,10 +159,14 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, | |||
159 | return error; | 159 | return error; |
160 | } | 160 | } |
161 | 161 | ||
162 | static void quota_sync_sb(struct super_block *sb, int type) | 162 | #ifdef CONFIG_QUOTA |
163 | void sync_quota_sb(struct super_block *sb, int type) | ||
163 | { | 164 | { |
164 | int cnt; | 165 | int cnt; |
165 | 166 | ||
167 | if (!sb->s_qcop->quota_sync) | ||
168 | return; | ||
169 | |||
166 | sb->s_qcop->quota_sync(sb, type); | 170 | sb->s_qcop->quota_sync(sb, type); |
167 | 171 | ||
168 | if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) | 172 | if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) |
@@ -191,17 +195,13 @@ static void quota_sync_sb(struct super_block *sb, int type) | |||
191 | } | 195 | } |
192 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); | 196 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); |
193 | } | 197 | } |
198 | #endif | ||
194 | 199 | ||
195 | void sync_dquots(struct super_block *sb, int type) | 200 | static void sync_dquots(int type) |
196 | { | 201 | { |
202 | struct super_block *sb; | ||
197 | int cnt; | 203 | int cnt; |
198 | 204 | ||
199 | if (sb) { | ||
200 | if (sb->s_qcop->quota_sync) | ||
201 | quota_sync_sb(sb, type); | ||
202 | return; | ||
203 | } | ||
204 | |||
205 | spin_lock(&sb_lock); | 205 | spin_lock(&sb_lock); |
206 | restart: | 206 | restart: |
207 | list_for_each_entry(sb, &super_blocks, s_list) { | 207 | list_for_each_entry(sb, &super_blocks, s_list) { |
@@ -222,8 +222,8 @@ restart: | |||
222 | sb->s_count++; | 222 | sb->s_count++; |
223 | spin_unlock(&sb_lock); | 223 | spin_unlock(&sb_lock); |
224 | down_read(&sb->s_umount); | 224 | down_read(&sb->s_umount); |
225 | if (sb->s_root && sb->s_qcop->quota_sync) | 225 | if (sb->s_root) |
226 | quota_sync_sb(sb, type); | 226 | sync_quota_sb(sb, type); |
227 | up_read(&sb->s_umount); | 227 | up_read(&sb->s_umount); |
228 | spin_lock(&sb_lock); | 228 | spin_lock(&sb_lock); |
229 | if (__put_super_and_need_restart(sb)) | 229 | if (__put_super_and_need_restart(sb)) |
@@ -301,7 +301,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, | |||
301 | return sb->s_qcop->set_dqblk(sb, type, id, &idq); | 301 | return sb->s_qcop->set_dqblk(sb, type, id, &idq); |
302 | } | 302 | } |
303 | case Q_SYNC: | 303 | case Q_SYNC: |
304 | sync_dquots(sb, type); | 304 | if (sb) |
305 | sync_quota_sb(sb, type); | ||
306 | else | ||
307 | sync_dquots(type); | ||
305 | return 0; | 308 | return 0; |
306 | 309 | ||
307 | case Q_XQUOTAON: | 310 | case Q_XQUOTAON: |
diff --git a/fs/read_write.c b/fs/read_write.c index 9d1e76bb9ee1..6c8c55dec2bc 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -805,12 +805,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
805 | goto out; | 805 | goto out; |
806 | if (!(in_file->f_mode & FMODE_READ)) | 806 | if (!(in_file->f_mode & FMODE_READ)) |
807 | goto fput_in; | 807 | goto fput_in; |
808 | retval = -EINVAL; | ||
809 | in_inode = in_file->f_path.dentry->d_inode; | ||
810 | if (!in_inode) | ||
811 | goto fput_in; | ||
812 | if (!in_file->f_op || !in_file->f_op->splice_read) | ||
813 | goto fput_in; | ||
814 | retval = -ESPIPE; | 808 | retval = -ESPIPE; |
815 | if (!ppos) | 809 | if (!ppos) |
816 | ppos = &in_file->f_pos; | 810 | ppos = &in_file->f_pos; |
@@ -834,6 +828,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
834 | retval = -EINVAL; | 828 | retval = -EINVAL; |
835 | if (!out_file->f_op || !out_file->f_op->sendpage) | 829 | if (!out_file->f_op || !out_file->f_op->sendpage) |
836 | goto fput_out; | 830 | goto fput_out; |
831 | in_inode = in_file->f_path.dentry->d_inode; | ||
837 | out_inode = out_file->f_path.dentry->d_inode; | 832 | out_inode = out_file->f_path.dentry->d_inode; |
838 | retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); | 833 | retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); |
839 | if (retval < 0) | 834 | if (retval < 0) |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 45ee3d357c70..6d2668fdc384 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -44,13 +44,11 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, | |||
44 | static inline bool is_privroot_deh(struct dentry *dir, | 44 | static inline bool is_privroot_deh(struct dentry *dir, |
45 | struct reiserfs_de_head *deh) | 45 | struct reiserfs_de_head *deh) |
46 | { | 46 | { |
47 | int ret = 0; | ||
48 | #ifdef CONFIG_REISERFS_FS_XATTR | ||
49 | struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; | 47 | struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; |
50 | ret = (dir == dir->d_parent && privroot->d_inode && | 48 | if (reiserfs_expose_privroot(dir->d_sb)) |
51 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); | 49 | return 0; |
52 | #endif | 50 | return (dir == dir->d_parent && privroot->d_inode && |
53 | return ret; | 51 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); |
54 | } | 52 | } |
55 | 53 | ||
56 | int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | 54 | int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3567fb9e3fb1..2969773cfc22 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
29 | #include <linux/namei.h> | 29 | #include <linux/namei.h> |
30 | #include <linux/crc32.h> | 30 | #include <linux/crc32.h> |
31 | #include <linux/smp_lock.h> | ||
31 | 32 | ||
32 | struct file_system_type reiserfs_fs_type; | 33 | struct file_system_type reiserfs_fs_type; |
33 | 34 | ||
@@ -64,18 +65,15 @@ static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); | |||
64 | 65 | ||
65 | static int reiserfs_sync_fs(struct super_block *s, int wait) | 66 | static int reiserfs_sync_fs(struct super_block *s, int wait) |
66 | { | 67 | { |
67 | if (!(s->s_flags & MS_RDONLY)) { | 68 | struct reiserfs_transaction_handle th; |
68 | struct reiserfs_transaction_handle th; | 69 | |
69 | reiserfs_write_lock(s); | 70 | reiserfs_write_lock(s); |
70 | if (!journal_begin(&th, s, 1)) | 71 | if (!journal_begin(&th, s, 1)) |
71 | if (!journal_end_sync(&th, s, 1)) | 72 | if (!journal_end_sync(&th, s, 1)) |
72 | reiserfs_flush_old_commits(s); | 73 | reiserfs_flush_old_commits(s); |
73 | s->s_dirt = 0; /* Even if it's not true. | 74 | s->s_dirt = 0; /* Even if it's not true. |
74 | * We'll loop forever in sync_supers otherwise */ | 75 | * We'll loop forever in sync_supers otherwise */ |
75 | reiserfs_write_unlock(s); | 76 | reiserfs_write_unlock(s); |
76 | } else { | ||
77 | s->s_dirt = 0; | ||
78 | } | ||
79 | return 0; | 77 | return 0; |
80 | } | 78 | } |
81 | 79 | ||
@@ -468,6 +466,11 @@ static void reiserfs_put_super(struct super_block *s) | |||
468 | struct reiserfs_transaction_handle th; | 466 | struct reiserfs_transaction_handle th; |
469 | th.t_trans_id = 0; | 467 | th.t_trans_id = 0; |
470 | 468 | ||
469 | lock_kernel(); | ||
470 | |||
471 | if (s->s_dirt) | ||
472 | reiserfs_write_super(s); | ||
473 | |||
471 | /* change file system state to current state if it was mounted with read-write permissions */ | 474 | /* change file system state to current state if it was mounted with read-write permissions */ |
472 | if (!(s->s_flags & MS_RDONLY)) { | 475 | if (!(s->s_flags & MS_RDONLY)) { |
473 | if (!journal_begin(&th, s, 10)) { | 476 | if (!journal_begin(&th, s, 10)) { |
@@ -500,7 +503,7 @@ static void reiserfs_put_super(struct super_block *s) | |||
500 | kfree(s->s_fs_info); | 503 | kfree(s->s_fs_info); |
501 | s->s_fs_info = NULL; | 504 | s->s_fs_info = NULL; |
502 | 505 | ||
503 | return; | 506 | unlock_kernel(); |
504 | } | 507 | } |
505 | 508 | ||
506 | static struct kmem_cache *reiserfs_inode_cachep; | 509 | static struct kmem_cache *reiserfs_inode_cachep; |
@@ -898,6 +901,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin | |||
898 | {"conv",.setmask = 1 << REISERFS_CONVERT}, | 901 | {"conv",.setmask = 1 << REISERFS_CONVERT}, |
899 | {"attrs",.setmask = 1 << REISERFS_ATTRS}, | 902 | {"attrs",.setmask = 1 << REISERFS_ATTRS}, |
900 | {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, | 903 | {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, |
904 | {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT}, | ||
901 | #ifdef CONFIG_REISERFS_FS_XATTR | 905 | #ifdef CONFIG_REISERFS_FS_XATTR |
902 | {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, | 906 | {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, |
903 | {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, | 907 | {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, |
@@ -1193,6 +1197,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1193 | memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); | 1197 | memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); |
1194 | #endif | 1198 | #endif |
1195 | 1199 | ||
1200 | lock_kernel(); | ||
1196 | rs = SB_DISK_SUPER_BLOCK(s); | 1201 | rs = SB_DISK_SUPER_BLOCK(s); |
1197 | 1202 | ||
1198 | if (!reiserfs_parse_options | 1203 | if (!reiserfs_parse_options |
@@ -1315,10 +1320,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1315 | 1320 | ||
1316 | out_ok: | 1321 | out_ok: |
1317 | replace_mount_options(s, new_opts); | 1322 | replace_mount_options(s, new_opts); |
1323 | unlock_kernel(); | ||
1318 | return 0; | 1324 | return 0; |
1319 | 1325 | ||
1320 | out_err: | 1326 | out_err: |
1321 | kfree(new_opts); | 1327 | kfree(new_opts); |
1328 | unlock_kernel(); | ||
1322 | return err; | 1329 | return err; |
1323 | } | 1330 | } |
1324 | 1331 | ||
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8e7deb0e6964..f3d47d856848 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -981,7 +981,8 @@ int reiserfs_lookup_privroot(struct super_block *s) | |||
981 | strlen(PRIVROOT_NAME)); | 981 | strlen(PRIVROOT_NAME)); |
982 | if (!IS_ERR(dentry)) { | 982 | if (!IS_ERR(dentry)) { |
983 | REISERFS_SB(s)->priv_root = dentry; | 983 | REISERFS_SB(s)->priv_root = dentry; |
984 | s->s_root->d_op = &xattr_lookup_poison_ops; | 984 | if (!reiserfs_expose_privroot(s)) |
985 | s->s_root->d_op = &xattr_lookup_poison_ops; | ||
985 | if (dentry->d_inode) | 986 | if (dentry->d_inode) |
986 | dentry->d_inode->i_flags |= S_PRIVATE; | 987 | dentry->d_inode->i_flags |= S_PRIVATE; |
987 | } else | 988 | } else |
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index fc27fbfc5397..1402d2d54f52 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c | |||
@@ -474,6 +474,8 @@ smb_put_super(struct super_block *sb) | |||
474 | { | 474 | { |
475 | struct smb_sb_info *server = SMB_SB(sb); | 475 | struct smb_sb_info *server = SMB_SB(sb); |
476 | 476 | ||
477 | lock_kernel(); | ||
478 | |||
477 | smb_lock_server(server); | 479 | smb_lock_server(server); |
478 | server->state = CONN_INVALID; | 480 | server->state = CONN_INVALID; |
479 | smbiod_unregister_server(server); | 481 | smbiod_unregister_server(server); |
@@ -489,6 +491,8 @@ smb_put_super(struct super_block *sb) | |||
489 | smb_unlock_server(server); | 491 | smb_unlock_server(server); |
490 | put_pid(server->conn_pid); | 492 | put_pid(server->conn_pid); |
491 | kfree(server); | 493 | kfree(server); |
494 | |||
495 | unlock_kernel(); | ||
492 | } | 496 | } |
493 | 497 | ||
494 | static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) | 498 | static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) |
diff --git a/fs/splice.c b/fs/splice.c index 666953d59a35..73766d24f97b 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -507,9 +507,131 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, | |||
507 | 507 | ||
508 | return ret; | 508 | return ret; |
509 | } | 509 | } |
510 | |||
511 | EXPORT_SYMBOL(generic_file_splice_read); | 510 | EXPORT_SYMBOL(generic_file_splice_read); |
512 | 511 | ||
512 | static const struct pipe_buf_operations default_pipe_buf_ops = { | ||
513 | .can_merge = 0, | ||
514 | .map = generic_pipe_buf_map, | ||
515 | .unmap = generic_pipe_buf_unmap, | ||
516 | .confirm = generic_pipe_buf_confirm, | ||
517 | .release = generic_pipe_buf_release, | ||
518 | .steal = generic_pipe_buf_steal, | ||
519 | .get = generic_pipe_buf_get, | ||
520 | }; | ||
521 | |||
522 | static ssize_t kernel_readv(struct file *file, const struct iovec *vec, | ||
523 | unsigned long vlen, loff_t offset) | ||
524 | { | ||
525 | mm_segment_t old_fs; | ||
526 | loff_t pos = offset; | ||
527 | ssize_t res; | ||
528 | |||
529 | old_fs = get_fs(); | ||
530 | set_fs(get_ds()); | ||
531 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
532 | res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); | ||
533 | set_fs(old_fs); | ||
534 | |||
535 | return res; | ||
536 | } | ||
537 | |||
538 | static ssize_t kernel_write(struct file *file, const char *buf, size_t count, | ||
539 | loff_t pos) | ||
540 | { | ||
541 | mm_segment_t old_fs; | ||
542 | ssize_t res; | ||
543 | |||
544 | old_fs = get_fs(); | ||
545 | set_fs(get_ds()); | ||
546 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
547 | res = vfs_write(file, (const char __user *)buf, count, &pos); | ||
548 | set_fs(old_fs); | ||
549 | |||
550 | return res; | ||
551 | } | ||
552 | |||
553 | ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | ||
554 | struct pipe_inode_info *pipe, size_t len, | ||
555 | unsigned int flags) | ||
556 | { | ||
557 | unsigned int nr_pages; | ||
558 | unsigned int nr_freed; | ||
559 | size_t offset; | ||
560 | struct page *pages[PIPE_BUFFERS]; | ||
561 | struct partial_page partial[PIPE_BUFFERS]; | ||
562 | struct iovec vec[PIPE_BUFFERS]; | ||
563 | pgoff_t index; | ||
564 | ssize_t res; | ||
565 | size_t this_len; | ||
566 | int error; | ||
567 | int i; | ||
568 | struct splice_pipe_desc spd = { | ||
569 | .pages = pages, | ||
570 | .partial = partial, | ||
571 | .flags = flags, | ||
572 | .ops = &default_pipe_buf_ops, | ||
573 | .spd_release = spd_release_page, | ||
574 | }; | ||
575 | |||
576 | index = *ppos >> PAGE_CACHE_SHIFT; | ||
577 | offset = *ppos & ~PAGE_CACHE_MASK; | ||
578 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
579 | |||
580 | for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { | ||
581 | struct page *page; | ||
582 | |||
583 | page = alloc_page(GFP_USER); | ||
584 | error = -ENOMEM; | ||
585 | if (!page) | ||
586 | goto err; | ||
587 | |||
588 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); | ||
589 | vec[i].iov_base = (void __user *) page_address(page); | ||
590 | vec[i].iov_len = this_len; | ||
591 | pages[i] = page; | ||
592 | spd.nr_pages++; | ||
593 | len -= this_len; | ||
594 | offset = 0; | ||
595 | } | ||
596 | |||
597 | res = kernel_readv(in, vec, spd.nr_pages, *ppos); | ||
598 | if (res < 0) { | ||
599 | error = res; | ||
600 | goto err; | ||
601 | } | ||
602 | |||
603 | error = 0; | ||
604 | if (!res) | ||
605 | goto err; | ||
606 | |||
607 | nr_freed = 0; | ||
608 | for (i = 0; i < spd.nr_pages; i++) { | ||
609 | this_len = min_t(size_t, vec[i].iov_len, res); | ||
610 | partial[i].offset = 0; | ||
611 | partial[i].len = this_len; | ||
612 | if (!this_len) { | ||
613 | __free_page(pages[i]); | ||
614 | pages[i] = NULL; | ||
615 | nr_freed++; | ||
616 | } | ||
617 | res -= this_len; | ||
618 | } | ||
619 | spd.nr_pages -= nr_freed; | ||
620 | |||
621 | res = splice_to_pipe(pipe, &spd); | ||
622 | if (res > 0) | ||
623 | *ppos += res; | ||
624 | |||
625 | return res; | ||
626 | |||
627 | err: | ||
628 | for (i = 0; i < spd.nr_pages; i++) | ||
629 | __free_page(pages[i]); | ||
630 | |||
631 | return error; | ||
632 | } | ||
633 | EXPORT_SYMBOL(default_file_splice_read); | ||
634 | |||
513 | /* | 635 | /* |
514 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' | 636 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' |
515 | * using sendpage(). Return the number of bytes sent. | 637 | * using sendpage(). Return the number of bytes sent. |
@@ -881,6 +1003,36 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
881 | 1003 | ||
882 | EXPORT_SYMBOL(generic_file_splice_write); | 1004 | EXPORT_SYMBOL(generic_file_splice_write); |
883 | 1005 | ||
1006 | static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | ||
1007 | struct splice_desc *sd) | ||
1008 | { | ||
1009 | int ret; | ||
1010 | void *data; | ||
1011 | |||
1012 | ret = buf->ops->confirm(pipe, buf); | ||
1013 | if (ret) | ||
1014 | return ret; | ||
1015 | |||
1016 | data = buf->ops->map(pipe, buf, 0); | ||
1017 | ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); | ||
1018 | buf->ops->unmap(pipe, buf, data); | ||
1019 | |||
1020 | return ret; | ||
1021 | } | ||
1022 | |||
1023 | static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, | ||
1024 | struct file *out, loff_t *ppos, | ||
1025 | size_t len, unsigned int flags) | ||
1026 | { | ||
1027 | ssize_t ret; | ||
1028 | |||
1029 | ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); | ||
1030 | if (ret > 0) | ||
1031 | *ppos += ret; | ||
1032 | |||
1033 | return ret; | ||
1034 | } | ||
1035 | |||
884 | /** | 1036 | /** |
885 | * generic_splice_sendpage - splice data from a pipe to a socket | 1037 | * generic_splice_sendpage - splice data from a pipe to a socket |
886 | * @pipe: pipe to splice from | 1038 | * @pipe: pipe to splice from |
@@ -908,11 +1060,10 @@ EXPORT_SYMBOL(generic_splice_sendpage); | |||
908 | static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | 1060 | static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, |
909 | loff_t *ppos, size_t len, unsigned int flags) | 1061 | loff_t *ppos, size_t len, unsigned int flags) |
910 | { | 1062 | { |
1063 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, | ||
1064 | loff_t *, size_t, unsigned int); | ||
911 | int ret; | 1065 | int ret; |
912 | 1066 | ||
913 | if (unlikely(!out->f_op || !out->f_op->splice_write)) | ||
914 | return -EINVAL; | ||
915 | |||
916 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | 1067 | if (unlikely(!(out->f_mode & FMODE_WRITE))) |
917 | return -EBADF; | 1068 | return -EBADF; |
918 | 1069 | ||
@@ -923,7 +1074,11 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
923 | if (unlikely(ret < 0)) | 1074 | if (unlikely(ret < 0)) |
924 | return ret; | 1075 | return ret; |
925 | 1076 | ||
926 | return out->f_op->splice_write(pipe, out, ppos, len, flags); | 1077 | splice_write = out->f_op->splice_write; |
1078 | if (!splice_write) | ||
1079 | splice_write = default_file_splice_write; | ||
1080 | |||
1081 | return splice_write(pipe, out, ppos, len, flags); | ||
927 | } | 1082 | } |
928 | 1083 | ||
929 | /* | 1084 | /* |
@@ -933,11 +1088,10 @@ static long do_splice_to(struct file *in, loff_t *ppos, | |||
933 | struct pipe_inode_info *pipe, size_t len, | 1088 | struct pipe_inode_info *pipe, size_t len, |
934 | unsigned int flags) | 1089 | unsigned int flags) |
935 | { | 1090 | { |
1091 | ssize_t (*splice_read)(struct file *, loff_t *, | ||
1092 | struct pipe_inode_info *, size_t, unsigned int); | ||
936 | int ret; | 1093 | int ret; |
937 | 1094 | ||
938 | if (unlikely(!in->f_op || !in->f_op->splice_read)) | ||
939 | return -EINVAL; | ||
940 | |||
941 | if (unlikely(!(in->f_mode & FMODE_READ))) | 1095 | if (unlikely(!(in->f_mode & FMODE_READ))) |
942 | return -EBADF; | 1096 | return -EBADF; |
943 | 1097 | ||
@@ -945,7 +1099,11 @@ static long do_splice_to(struct file *in, loff_t *ppos, | |||
945 | if (unlikely(ret < 0)) | 1099 | if (unlikely(ret < 0)) |
946 | return ret; | 1100 | return ret; |
947 | 1101 | ||
948 | return in->f_op->splice_read(in, ppos, pipe, len, flags); | 1102 | splice_read = in->f_op->splice_read; |
1103 | if (!splice_read) | ||
1104 | splice_read = default_file_splice_read; | ||
1105 | |||
1106 | return splice_read(in, ppos, pipe, len, flags); | ||
949 | } | 1107 | } |
950 | 1108 | ||
951 | /** | 1109 | /** |
@@ -1112,6 +1270,9 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1112 | return ret; | 1270 | return ret; |
1113 | } | 1271 | } |
1114 | 1272 | ||
1273 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | ||
1274 | struct pipe_inode_info *opipe, | ||
1275 | size_t len, unsigned int flags); | ||
1115 | /* | 1276 | /* |
1116 | * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same | 1277 | * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same |
1117 | * location, so checking ->i_pipe is not enough to verify that this is a | 1278 | * location, so checking ->i_pipe is not enough to verify that this is a |
@@ -1132,12 +1293,32 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1132 | struct file *out, loff_t __user *off_out, | 1293 | struct file *out, loff_t __user *off_out, |
1133 | size_t len, unsigned int flags) | 1294 | size_t len, unsigned int flags) |
1134 | { | 1295 | { |
1135 | struct pipe_inode_info *pipe; | 1296 | struct pipe_inode_info *ipipe; |
1297 | struct pipe_inode_info *opipe; | ||
1136 | loff_t offset, *off; | 1298 | loff_t offset, *off; |
1137 | long ret; | 1299 | long ret; |
1138 | 1300 | ||
1139 | pipe = pipe_info(in->f_path.dentry->d_inode); | 1301 | ipipe = pipe_info(in->f_path.dentry->d_inode); |
1140 | if (pipe) { | 1302 | opipe = pipe_info(out->f_path.dentry->d_inode); |
1303 | |||
1304 | if (ipipe && opipe) { | ||
1305 | if (off_in || off_out) | ||
1306 | return -ESPIPE; | ||
1307 | |||
1308 | if (!(in->f_mode & FMODE_READ)) | ||
1309 | return -EBADF; | ||
1310 | |||
1311 | if (!(out->f_mode & FMODE_WRITE)) | ||
1312 | return -EBADF; | ||
1313 | |||
1314 | /* Splicing to self would be fun, but... */ | ||
1315 | if (ipipe == opipe) | ||
1316 | return -EINVAL; | ||
1317 | |||
1318 | return splice_pipe_to_pipe(ipipe, opipe, len, flags); | ||
1319 | } | ||
1320 | |||
1321 | if (ipipe) { | ||
1141 | if (off_in) | 1322 | if (off_in) |
1142 | return -ESPIPE; | 1323 | return -ESPIPE; |
1143 | if (off_out) { | 1324 | if (off_out) { |
@@ -1149,7 +1330,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1149 | } else | 1330 | } else |
1150 | off = &out->f_pos; | 1331 | off = &out->f_pos; |
1151 | 1332 | ||
1152 | ret = do_splice_from(pipe, out, off, len, flags); | 1333 | ret = do_splice_from(ipipe, out, off, len, flags); |
1153 | 1334 | ||
1154 | if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) | 1335 | if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) |
1155 | ret = -EFAULT; | 1336 | ret = -EFAULT; |
@@ -1157,8 +1338,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1157 | return ret; | 1338 | return ret; |
1158 | } | 1339 | } |
1159 | 1340 | ||
1160 | pipe = pipe_info(out->f_path.dentry->d_inode); | 1341 | if (opipe) { |
1161 | if (pipe) { | ||
1162 | if (off_out) | 1342 | if (off_out) |
1163 | return -ESPIPE; | 1343 | return -ESPIPE; |
1164 | if (off_in) { | 1344 | if (off_in) { |
@@ -1170,7 +1350,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1170 | } else | 1350 | } else |
1171 | off = &in->f_pos; | 1351 | off = &in->f_pos; |
1172 | 1352 | ||
1173 | ret = do_splice_to(in, off, pipe, len, flags); | 1353 | ret = do_splice_to(in, off, opipe, len, flags); |
1174 | 1354 | ||
1175 | if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) | 1355 | if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) |
1176 | ret = -EFAULT; | 1356 | ret = -EFAULT; |
@@ -1511,7 +1691,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, | |||
1511 | * Make sure there's data to read. Wait for input if we can, otherwise | 1691 | * Make sure there's data to read. Wait for input if we can, otherwise |
1512 | * return an appropriate error. | 1692 | * return an appropriate error. |
1513 | */ | 1693 | */ |
1514 | static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | 1694 | static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) |
1515 | { | 1695 | { |
1516 | int ret; | 1696 | int ret; |
1517 | 1697 | ||
@@ -1549,7 +1729,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1549 | * Make sure there's writeable room. Wait for room if we can, otherwise | 1729 | * Make sure there's writeable room. Wait for room if we can, otherwise |
1550 | * return an appropriate error. | 1730 | * return an appropriate error. |
1551 | */ | 1731 | */ |
1552 | static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | 1732 | static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) |
1553 | { | 1733 | { |
1554 | int ret; | 1734 | int ret; |
1555 | 1735 | ||
@@ -1587,6 +1767,124 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1587 | } | 1767 | } |
1588 | 1768 | ||
1589 | /* | 1769 | /* |
1770 | * Splice contents of ipipe to opipe. | ||
1771 | */ | ||
1772 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | ||
1773 | struct pipe_inode_info *opipe, | ||
1774 | size_t len, unsigned int flags) | ||
1775 | { | ||
1776 | struct pipe_buffer *ibuf, *obuf; | ||
1777 | int ret = 0, nbuf; | ||
1778 | bool input_wakeup = false; | ||
1779 | |||
1780 | |||
1781 | retry: | ||
1782 | ret = ipipe_prep(ipipe, flags); | ||
1783 | if (ret) | ||
1784 | return ret; | ||
1785 | |||
1786 | ret = opipe_prep(opipe, flags); | ||
1787 | if (ret) | ||
1788 | return ret; | ||
1789 | |||
1790 | /* | ||
1791 | * Potential ABBA deadlock, work around it by ordering lock | ||
1792 | * grabbing by pipe info address. Otherwise two different processes | ||
1793 | * could deadlock (one doing tee from A -> B, the other from B -> A). | ||
1794 | */ | ||
1795 | pipe_double_lock(ipipe, opipe); | ||
1796 | |||
1797 | do { | ||
1798 | if (!opipe->readers) { | ||
1799 | send_sig(SIGPIPE, current, 0); | ||
1800 | if (!ret) | ||
1801 | ret = -EPIPE; | ||
1802 | break; | ||
1803 | } | ||
1804 | |||
1805 | if (!ipipe->nrbufs && !ipipe->writers) | ||
1806 | break; | ||
1807 | |||
1808 | /* | ||
1809 | * Cannot make any progress, because either the input | ||
1810 | * pipe is empty or the output pipe is full. | ||
1811 | */ | ||
1812 | if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { | ||
1813 | /* Already processed some buffers, break */ | ||
1814 | if (ret) | ||
1815 | break; | ||
1816 | |||
1817 | if (flags & SPLICE_F_NONBLOCK) { | ||
1818 | ret = -EAGAIN; | ||
1819 | break; | ||
1820 | } | ||
1821 | |||
1822 | /* | ||
1823 | * We raced with another reader/writer and haven't | ||
1824 | * managed to process any buffers. A zero return | ||
1825 | * value means EOF, so retry instead. | ||
1826 | */ | ||
1827 | pipe_unlock(ipipe); | ||
1828 | pipe_unlock(opipe); | ||
1829 | goto retry; | ||
1830 | } | ||
1831 | |||
1832 | ibuf = ipipe->bufs + ipipe->curbuf; | ||
1833 | nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; | ||
1834 | obuf = opipe->bufs + nbuf; | ||
1835 | |||
1836 | if (len >= ibuf->len) { | ||
1837 | /* | ||
1838 | * Simply move the whole buffer from ipipe to opipe | ||
1839 | */ | ||
1840 | *obuf = *ibuf; | ||
1841 | ibuf->ops = NULL; | ||
1842 | opipe->nrbufs++; | ||
1843 | ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; | ||
1844 | ipipe->nrbufs--; | ||
1845 | input_wakeup = true; | ||
1846 | } else { | ||
1847 | /* | ||
1848 | * Get a reference to this pipe buffer, | ||
1849 | * so we can copy the contents over. | ||
1850 | */ | ||
1851 | ibuf->ops->get(ipipe, ibuf); | ||
1852 | *obuf = *ibuf; | ||
1853 | |||
1854 | /* | ||
1855 | * Don't inherit the gift flag, we need to | ||
1856 | * prevent multiple steals of this page. | ||
1857 | */ | ||
1858 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | ||
1859 | |||
1860 | obuf->len = len; | ||
1861 | opipe->nrbufs++; | ||
1862 | ibuf->offset += obuf->len; | ||
1863 | ibuf->len -= obuf->len; | ||
1864 | } | ||
1865 | ret += obuf->len; | ||
1866 | len -= obuf->len; | ||
1867 | } while (len); | ||
1868 | |||
1869 | pipe_unlock(ipipe); | ||
1870 | pipe_unlock(opipe); | ||
1871 | |||
1872 | /* | ||
1873 | * If we put data in the output pipe, wakeup any potential readers. | ||
1874 | */ | ||
1875 | if (ret > 0) { | ||
1876 | smp_mb(); | ||
1877 | if (waitqueue_active(&opipe->wait)) | ||
1878 | wake_up_interruptible(&opipe->wait); | ||
1879 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
1880 | } | ||
1881 | if (input_wakeup) | ||
1882 | wakeup_pipe_writers(ipipe); | ||
1883 | |||
1884 | return ret; | ||
1885 | } | ||
1886 | |||
1887 | /* | ||
1590 | * Link contents of ipipe to opipe. | 1888 | * Link contents of ipipe to opipe. |
1591 | */ | 1889 | */ |
1592 | static int link_pipe(struct pipe_inode_info *ipipe, | 1890 | static int link_pipe(struct pipe_inode_info *ipipe, |
@@ -1690,9 +1988,9 @@ static long do_tee(struct file *in, struct file *out, size_t len, | |||
1690 | * Keep going, unless we encounter an error. The ipipe/opipe | 1988 | * Keep going, unless we encounter an error. The ipipe/opipe |
1691 | * ordering doesn't really matter. | 1989 | * ordering doesn't really matter. |
1692 | */ | 1990 | */ |
1693 | ret = link_ipipe_prep(ipipe, flags); | 1991 | ret = ipipe_prep(ipipe, flags); |
1694 | if (!ret) { | 1992 | if (!ret) { |
1695 | ret = link_opipe_prep(opipe, flags); | 1993 | ret = opipe_prep(opipe, flags); |
1696 | if (!ret) | 1994 | if (!ret) |
1697 | ret = link_pipe(ipipe, opipe, len, flags); | 1995 | ret = link_pipe(ipipe, opipe, len, flags); |
1698 | } | 1996 | } |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 0adc624c956f..3b52770f46ff 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -338,6 +338,8 @@ static int squashfs_remount(struct super_block *sb, int *flags, char *data) | |||
338 | 338 | ||
339 | static void squashfs_put_super(struct super_block *sb) | 339 | static void squashfs_put_super(struct super_block *sb) |
340 | { | 340 | { |
341 | lock_kernel(); | ||
342 | |||
341 | if (sb->s_fs_info) { | 343 | if (sb->s_fs_info) { |
342 | struct squashfs_sb_info *sbi = sb->s_fs_info; | 344 | struct squashfs_sb_info *sbi = sb->s_fs_info; |
343 | squashfs_cache_delete(sbi->block_cache); | 345 | squashfs_cache_delete(sbi->block_cache); |
@@ -350,6 +352,8 @@ static void squashfs_put_super(struct super_block *sb) | |||
350 | kfree(sb->s_fs_info); | 352 | kfree(sb->s_fs_info); |
351 | sb->s_fs_info = NULL; | 353 | sb->s_fs_info = NULL; |
352 | } | 354 | } |
355 | |||
356 | unlock_kernel(); | ||
353 | } | 357 | } |
354 | 358 | ||
355 | 359 | ||
diff --git a/fs/super.c b/fs/super.c index 1943fdf655fa..83b47416d006 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/blkdev.h> | 28 | #include <linux/blkdev.h> |
29 | #include <linux/quotaops.h> | 29 | #include <linux/quotaops.h> |
30 | #include <linux/namei.h> | 30 | #include <linux/namei.h> |
31 | #include <linux/buffer_head.h> /* for fsync_super() */ | ||
32 | #include <linux/mount.h> | 31 | #include <linux/mount.h> |
33 | #include <linux/security.h> | 32 | #include <linux/security.h> |
34 | #include <linux/syscalls.h> | 33 | #include <linux/syscalls.h> |
@@ -38,7 +37,6 @@ | |||
38 | #include <linux/kobject.h> | 37 | #include <linux/kobject.h> |
39 | #include <linux/mutex.h> | 38 | #include <linux/mutex.h> |
40 | #include <linux/file.h> | 39 | #include <linux/file.h> |
41 | #include <linux/async.h> | ||
42 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
43 | #include "internal.h" | 41 | #include "internal.h" |
44 | 42 | ||
@@ -72,7 +70,6 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
72 | INIT_HLIST_HEAD(&s->s_anon); | 70 | INIT_HLIST_HEAD(&s->s_anon); |
73 | INIT_LIST_HEAD(&s->s_inodes); | 71 | INIT_LIST_HEAD(&s->s_inodes); |
74 | INIT_LIST_HEAD(&s->s_dentry_lru); | 72 | INIT_LIST_HEAD(&s->s_dentry_lru); |
75 | INIT_LIST_HEAD(&s->s_async_list); | ||
76 | init_rwsem(&s->s_umount); | 73 | init_rwsem(&s->s_umount); |
77 | mutex_init(&s->s_lock); | 74 | mutex_init(&s->s_lock); |
78 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | 75 | lockdep_set_class(&s->s_umount, &type->s_umount_key); |
@@ -285,38 +282,6 @@ void unlock_super(struct super_block * sb) | |||
285 | EXPORT_SYMBOL(lock_super); | 282 | EXPORT_SYMBOL(lock_super); |
286 | EXPORT_SYMBOL(unlock_super); | 283 | EXPORT_SYMBOL(unlock_super); |
287 | 284 | ||
288 | /* | ||
289 | * Write out and wait upon all dirty data associated with this | ||
290 | * superblock. Filesystem data as well as the underlying block | ||
291 | * device. Takes the superblock lock. Requires a second blkdev | ||
292 | * flush by the caller to complete the operation. | ||
293 | */ | ||
294 | void __fsync_super(struct super_block *sb) | ||
295 | { | ||
296 | sync_inodes_sb(sb, 0); | ||
297 | vfs_dq_sync(sb); | ||
298 | lock_super(sb); | ||
299 | if (sb->s_dirt && sb->s_op->write_super) | ||
300 | sb->s_op->write_super(sb); | ||
301 | unlock_super(sb); | ||
302 | if (sb->s_op->sync_fs) | ||
303 | sb->s_op->sync_fs(sb, 1); | ||
304 | sync_blockdev(sb->s_bdev); | ||
305 | sync_inodes_sb(sb, 1); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * Write out and wait upon all dirty data associated with this | ||
310 | * superblock. Filesystem data as well as the underlying block | ||
311 | * device. Takes the superblock lock. | ||
312 | */ | ||
313 | int fsync_super(struct super_block *sb) | ||
314 | { | ||
315 | __fsync_super(sb); | ||
316 | return sync_blockdev(sb->s_bdev); | ||
317 | } | ||
318 | EXPORT_SYMBOL_GPL(fsync_super); | ||
319 | |||
320 | /** | 285 | /** |
321 | * generic_shutdown_super - common helper for ->kill_sb() | 286 | * generic_shutdown_super - common helper for ->kill_sb() |
322 | * @sb: superblock to kill | 287 | * @sb: superblock to kill |
@@ -338,21 +303,13 @@ void generic_shutdown_super(struct super_block *sb) | |||
338 | 303 | ||
339 | if (sb->s_root) { | 304 | if (sb->s_root) { |
340 | shrink_dcache_for_umount(sb); | 305 | shrink_dcache_for_umount(sb); |
341 | fsync_super(sb); | 306 | sync_filesystem(sb); |
342 | lock_super(sb); | 307 | get_fs_excl(); |
343 | sb->s_flags &= ~MS_ACTIVE; | 308 | sb->s_flags &= ~MS_ACTIVE; |
344 | 309 | ||
345 | /* | ||
346 | * wait for asynchronous fs operations to finish before going further | ||
347 | */ | ||
348 | async_synchronize_full_domain(&sb->s_async_list); | ||
349 | |||
350 | /* bad name - it should be evict_inodes() */ | 310 | /* bad name - it should be evict_inodes() */ |
351 | invalidate_inodes(sb); | 311 | invalidate_inodes(sb); |
352 | lock_kernel(); | ||
353 | 312 | ||
354 | if (sop->write_super && sb->s_dirt) | ||
355 | sop->write_super(sb); | ||
356 | if (sop->put_super) | 313 | if (sop->put_super) |
357 | sop->put_super(sb); | 314 | sop->put_super(sb); |
358 | 315 | ||
@@ -362,9 +319,7 @@ void generic_shutdown_super(struct super_block *sb) | |||
362 | "Self-destruct in 5 seconds. Have a nice day...\n", | 319 | "Self-destruct in 5 seconds. Have a nice day...\n", |
363 | sb->s_id); | 320 | sb->s_id); |
364 | } | 321 | } |
365 | 322 | put_fs_excl(); | |
366 | unlock_kernel(); | ||
367 | unlock_super(sb); | ||
368 | } | 323 | } |
369 | spin_lock(&sb_lock); | 324 | spin_lock(&sb_lock); |
370 | /* should be initialized for __put_super_and_need_restart() */ | 325 | /* should be initialized for __put_super_and_need_restart() */ |
@@ -441,16 +396,14 @@ void drop_super(struct super_block *sb) | |||
441 | 396 | ||
442 | EXPORT_SYMBOL(drop_super); | 397 | EXPORT_SYMBOL(drop_super); |
443 | 398 | ||
444 | static inline void write_super(struct super_block *sb) | 399 | /** |
445 | { | 400 | * sync_supers - helper for periodic superblock writeback |
446 | lock_super(sb); | 401 | * |
447 | if (sb->s_root && sb->s_dirt) | 402 | * Call the write_super method if present on all dirty superblocks in |
448 | if (sb->s_op->write_super) | 403 | * the system. This is for the periodic writeback used by most older |
449 | sb->s_op->write_super(sb); | 404 | * filesystems. For data integrity superblock writeback use |
450 | unlock_super(sb); | 405 | * sync_filesystems() instead. |
451 | } | 406 | * |
452 | |||
453 | /* | ||
454 | * Note: check the dirty flag before waiting, so we don't | 407 | * Note: check the dirty flag before waiting, so we don't |
455 | * hold up the sync while mounting a device. (The newly | 408 | * hold up the sync while mounting a device. (The newly |
456 | * mounted device won't need syncing.) | 409 | * mounted device won't need syncing.) |
@@ -462,12 +415,15 @@ void sync_supers(void) | |||
462 | spin_lock(&sb_lock); | 415 | spin_lock(&sb_lock); |
463 | restart: | 416 | restart: |
464 | list_for_each_entry(sb, &super_blocks, s_list) { | 417 | list_for_each_entry(sb, &super_blocks, s_list) { |
465 | if (sb->s_dirt) { | 418 | if (sb->s_op->write_super && sb->s_dirt) { |
466 | sb->s_count++; | 419 | sb->s_count++; |
467 | spin_unlock(&sb_lock); | 420 | spin_unlock(&sb_lock); |
421 | |||
468 | down_read(&sb->s_umount); | 422 | down_read(&sb->s_umount); |
469 | write_super(sb); | 423 | if (sb->s_root && sb->s_dirt) |
424 | sb->s_op->write_super(sb); | ||
470 | up_read(&sb->s_umount); | 425 | up_read(&sb->s_umount); |
426 | |||
471 | spin_lock(&sb_lock); | 427 | spin_lock(&sb_lock); |
472 | if (__put_super_and_need_restart(sb)) | 428 | if (__put_super_and_need_restart(sb)) |
473 | goto restart; | 429 | goto restart; |
@@ -476,60 +432,6 @@ restart: | |||
476 | spin_unlock(&sb_lock); | 432 | spin_unlock(&sb_lock); |
477 | } | 433 | } |
478 | 434 | ||
479 | /* | ||
480 | * Call the ->sync_fs super_op against all filesystems which are r/w and | ||
481 | * which implement it. | ||
482 | * | ||
483 | * This operation is careful to avoid the livelock which could easily happen | ||
484 | * if two or more filesystems are being continuously dirtied. s_need_sync_fs | ||
485 | * is used only here. We set it against all filesystems and then clear it as | ||
486 | * we sync them. So redirtied filesystems are skipped. | ||
487 | * | ||
488 | * But if process A is currently running sync_filesystems and then process B | ||
489 | * calls sync_filesystems as well, process B will set all the s_need_sync_fs | ||
490 | * flags again, which will cause process A to resync everything. Fix that with | ||
491 | * a local mutex. | ||
492 | * | ||
493 | * (Fabian) Avoid sync_fs with clean fs & wait mode 0 | ||
494 | */ | ||
495 | void sync_filesystems(int wait) | ||
496 | { | ||
497 | struct super_block *sb; | ||
498 | static DEFINE_MUTEX(mutex); | ||
499 | |||
500 | mutex_lock(&mutex); /* Could be down_interruptible */ | ||
501 | spin_lock(&sb_lock); | ||
502 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
503 | if (!sb->s_op->sync_fs) | ||
504 | continue; | ||
505 | if (sb->s_flags & MS_RDONLY) | ||
506 | continue; | ||
507 | sb->s_need_sync_fs = 1; | ||
508 | } | ||
509 | |||
510 | restart: | ||
511 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
512 | if (!sb->s_need_sync_fs) | ||
513 | continue; | ||
514 | sb->s_need_sync_fs = 0; | ||
515 | if (sb->s_flags & MS_RDONLY) | ||
516 | continue; /* hm. Was remounted r/o meanwhile */ | ||
517 | sb->s_count++; | ||
518 | spin_unlock(&sb_lock); | ||
519 | down_read(&sb->s_umount); | ||
520 | async_synchronize_full_domain(&sb->s_async_list); | ||
521 | if (sb->s_root && (wait || sb->s_dirt)) | ||
522 | sb->s_op->sync_fs(sb, wait); | ||
523 | up_read(&sb->s_umount); | ||
524 | /* restart only when sb is no longer on the list */ | ||
525 | spin_lock(&sb_lock); | ||
526 | if (__put_super_and_need_restart(sb)) | ||
527 | goto restart; | ||
528 | } | ||
529 | spin_unlock(&sb_lock); | ||
530 | mutex_unlock(&mutex); | ||
531 | } | ||
532 | |||
533 | /** | 435 | /** |
534 | * get_super - get the superblock of a device | 436 | * get_super - get the superblock of a device |
535 | * @bdev: device to get the superblock for | 437 | * @bdev: device to get the superblock for |
@@ -616,45 +518,6 @@ out: | |||
616 | } | 518 | } |
617 | 519 | ||
618 | /** | 520 | /** |
619 | * mark_files_ro - mark all files read-only | ||
620 | * @sb: superblock in question | ||
621 | * | ||
622 | * All files are marked read-only. We don't care about pending | ||
623 | * delete files so this should be used in 'force' mode only. | ||
624 | */ | ||
625 | |||
626 | static void mark_files_ro(struct super_block *sb) | ||
627 | { | ||
628 | struct file *f; | ||
629 | |||
630 | retry: | ||
631 | file_list_lock(); | ||
632 | list_for_each_entry(f, &sb->s_files, f_u.fu_list) { | ||
633 | struct vfsmount *mnt; | ||
634 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) | ||
635 | continue; | ||
636 | if (!file_count(f)) | ||
637 | continue; | ||
638 | if (!(f->f_mode & FMODE_WRITE)) | ||
639 | continue; | ||
640 | f->f_mode &= ~FMODE_WRITE; | ||
641 | if (file_check_writeable(f) != 0) | ||
642 | continue; | ||
643 | file_release_write(f); | ||
644 | mnt = mntget(f->f_path.mnt); | ||
645 | file_list_unlock(); | ||
646 | /* | ||
647 | * This can sleep, so we can't hold | ||
648 | * the file_list_lock() spinlock. | ||
649 | */ | ||
650 | mnt_drop_write(mnt); | ||
651 | mntput(mnt); | ||
652 | goto retry; | ||
653 | } | ||
654 | file_list_unlock(); | ||
655 | } | ||
656 | |||
657 | /** | ||
658 | * do_remount_sb - asks filesystem to change mount options. | 521 | * do_remount_sb - asks filesystem to change mount options. |
659 | * @sb: superblock in question | 522 | * @sb: superblock in question |
660 | * @flags: numeric part of options | 523 | * @flags: numeric part of options |
@@ -675,27 +538,31 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
675 | if (flags & MS_RDONLY) | 538 | if (flags & MS_RDONLY) |
676 | acct_auto_close(sb); | 539 | acct_auto_close(sb); |
677 | shrink_dcache_sb(sb); | 540 | shrink_dcache_sb(sb); |
678 | fsync_super(sb); | 541 | sync_filesystem(sb); |
679 | 542 | ||
680 | /* If we are remounting RDONLY and current sb is read/write, | 543 | /* If we are remounting RDONLY and current sb is read/write, |
681 | make sure there are no rw files opened */ | 544 | make sure there are no rw files opened */ |
682 | if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { | 545 | if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { |
683 | if (force) | 546 | if (force) |
684 | mark_files_ro(sb); | 547 | mark_files_ro(sb); |
685 | else if (!fs_may_remount_ro(sb)) | 548 | else if (!fs_may_remount_ro(sb)) { |
549 | unlock_kernel(); | ||
686 | return -EBUSY; | 550 | return -EBUSY; |
551 | } | ||
687 | retval = vfs_dq_off(sb, 1); | 552 | retval = vfs_dq_off(sb, 1); |
688 | if (retval < 0 && retval != -ENOSYS) | 553 | if (retval < 0 && retval != -ENOSYS) { |
554 | unlock_kernel(); | ||
689 | return -EBUSY; | 555 | return -EBUSY; |
556 | } | ||
690 | } | 557 | } |
691 | remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); | 558 | remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); |
692 | 559 | ||
693 | if (sb->s_op->remount_fs) { | 560 | if (sb->s_op->remount_fs) { |
694 | lock_super(sb); | ||
695 | retval = sb->s_op->remount_fs(sb, &flags, data); | 561 | retval = sb->s_op->remount_fs(sb, &flags, data); |
696 | unlock_super(sb); | 562 | if (retval) { |
697 | if (retval) | 563 | unlock_kernel(); |
698 | return retval; | 564 | return retval; |
565 | } | ||
699 | } | 566 | } |
700 | sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); | 567 | sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); |
701 | if (remount_rw) | 568 | if (remount_rw) |
@@ -711,18 +578,17 @@ static void do_emergency_remount(struct work_struct *work) | |||
711 | list_for_each_entry(sb, &super_blocks, s_list) { | 578 | list_for_each_entry(sb, &super_blocks, s_list) { |
712 | sb->s_count++; | 579 | sb->s_count++; |
713 | spin_unlock(&sb_lock); | 580 | spin_unlock(&sb_lock); |
714 | down_read(&sb->s_umount); | 581 | down_write(&sb->s_umount); |
715 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { | 582 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { |
716 | /* | 583 | /* |
717 | * ->remount_fs needs lock_kernel(). | 584 | * ->remount_fs needs lock_kernel(). |
718 | * | 585 | * |
719 | * What lock protects sb->s_flags?? | 586 | * What lock protects sb->s_flags?? |
720 | */ | 587 | */ |
721 | lock_kernel(); | ||
722 | do_remount_sb(sb, MS_RDONLY, NULL, 1); | 588 | do_remount_sb(sb, MS_RDONLY, NULL, 1); |
723 | unlock_kernel(); | ||
724 | } | 589 | } |
725 | drop_super(sb); | 590 | up_write(&sb->s_umount); |
591 | put_super(sb); | ||
726 | spin_lock(&sb_lock); | 592 | spin_lock(&sb_lock); |
727 | } | 593 | } |
728 | spin_unlock(&sb_lock); | 594 | spin_unlock(&sb_lock); |
@@ -13,38 +13,123 @@ | |||
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/quotaops.h> | 14 | #include <linux/quotaops.h> |
15 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
16 | #include "internal.h" | ||
16 | 17 | ||
17 | #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ | 18 | #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ |
18 | SYNC_FILE_RANGE_WAIT_AFTER) | 19 | SYNC_FILE_RANGE_WAIT_AFTER) |
19 | 20 | ||
20 | /* | 21 | /* |
21 | * sync everything. Start out by waking pdflush, because that writes back | 22 | * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) |
22 | * all queues in parallel. | 23 | * just dirties buffers with inodes so we have to submit IO for these buffers |
24 | * via __sync_blockdev(). This also speeds up the wait == 1 case since in that | ||
25 | * case write_inode() functions do sync_dirty_buffer() and thus effectively | ||
26 | * write one block at a time. | ||
23 | */ | 27 | */ |
24 | static void do_sync(unsigned long wait) | 28 | static int __sync_filesystem(struct super_block *sb, int wait) |
25 | { | 29 | { |
26 | wakeup_pdflush(0); | 30 | /* Avoid doing twice syncing and cache pruning for quota sync */ |
27 | sync_inodes(0); /* All mappings, inodes and their blockdevs */ | ||
28 | vfs_dq_sync(NULL); | ||
29 | sync_supers(); /* Write the superblocks */ | ||
30 | sync_filesystems(0); /* Start syncing the filesystems */ | ||
31 | sync_filesystems(wait); /* Waitingly sync the filesystems */ | ||
32 | sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ | ||
33 | if (!wait) | 31 | if (!wait) |
34 | printk("Emergency Sync complete\n"); | 32 | writeout_quota_sb(sb, -1); |
35 | if (unlikely(laptop_mode)) | 33 | else |
36 | laptop_sync_completion(); | 34 | sync_quota_sb(sb, -1); |
35 | sync_inodes_sb(sb, wait); | ||
36 | if (sb->s_op->sync_fs) | ||
37 | sb->s_op->sync_fs(sb, wait); | ||
38 | return __sync_blockdev(sb->s_bdev, wait); | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * Write out and wait upon all dirty data associated with this | ||
43 | * superblock. Filesystem data as well as the underlying block | ||
44 | * device. Takes the superblock lock. | ||
45 | */ | ||
46 | int sync_filesystem(struct super_block *sb) | ||
47 | { | ||
48 | int ret; | ||
49 | |||
50 | /* | ||
51 | * We need to be protected against the filesystem going from | ||
52 | * r/o to r/w or vice versa. | ||
53 | */ | ||
54 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
55 | |||
56 | /* | ||
57 | * No point in syncing out anything if the filesystem is read-only. | ||
58 | */ | ||
59 | if (sb->s_flags & MS_RDONLY) | ||
60 | return 0; | ||
61 | |||
62 | ret = __sync_filesystem(sb, 0); | ||
63 | if (ret < 0) | ||
64 | return ret; | ||
65 | return __sync_filesystem(sb, 1); | ||
66 | } | ||
67 | EXPORT_SYMBOL_GPL(sync_filesystem); | ||
68 | |||
69 | /* | ||
70 | * Sync all the data for all the filesystems (called by sys_sync() and | ||
71 | * emergency sync) | ||
72 | * | ||
73 | * This operation is careful to avoid the livelock which could easily happen | ||
74 | * if two or more filesystems are being continuously dirtied. s_need_sync | ||
75 | * is used only here. We set it against all filesystems and then clear it as | ||
76 | * we sync them. So redirtied filesystems are skipped. | ||
77 | * | ||
78 | * But if process A is currently running sync_filesystems and then process B | ||
79 | * calls sync_filesystems as well, process B will set all the s_need_sync | ||
80 | * flags again, which will cause process A to resync everything. Fix that with | ||
81 | * a local mutex. | ||
82 | */ | ||
83 | static void sync_filesystems(int wait) | ||
84 | { | ||
85 | struct super_block *sb; | ||
86 | static DEFINE_MUTEX(mutex); | ||
87 | |||
88 | mutex_lock(&mutex); /* Could be down_interruptible */ | ||
89 | spin_lock(&sb_lock); | ||
90 | list_for_each_entry(sb, &super_blocks, s_list) | ||
91 | sb->s_need_sync = 1; | ||
92 | |||
93 | restart: | ||
94 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
95 | if (!sb->s_need_sync) | ||
96 | continue; | ||
97 | sb->s_need_sync = 0; | ||
98 | sb->s_count++; | ||
99 | spin_unlock(&sb_lock); | ||
100 | |||
101 | down_read(&sb->s_umount); | ||
102 | if (!(sb->s_flags & MS_RDONLY) && sb->s_root) | ||
103 | __sync_filesystem(sb, wait); | ||
104 | up_read(&sb->s_umount); | ||
105 | |||
106 | /* restart only when sb is no longer on the list */ | ||
107 | spin_lock(&sb_lock); | ||
108 | if (__put_super_and_need_restart(sb)) | ||
109 | goto restart; | ||
110 | } | ||
111 | spin_unlock(&sb_lock); | ||
112 | mutex_unlock(&mutex); | ||
37 | } | 113 | } |
38 | 114 | ||
39 | SYSCALL_DEFINE0(sync) | 115 | SYSCALL_DEFINE0(sync) |
40 | { | 116 | { |
41 | do_sync(1); | 117 | sync_filesystems(0); |
118 | sync_filesystems(1); | ||
119 | if (unlikely(laptop_mode)) | ||
120 | laptop_sync_completion(); | ||
42 | return 0; | 121 | return 0; |
43 | } | 122 | } |
44 | 123 | ||
45 | static void do_sync_work(struct work_struct *work) | 124 | static void do_sync_work(struct work_struct *work) |
46 | { | 125 | { |
47 | do_sync(0); | 126 | /* |
127 | * Sync twice to reduce the possibility we skipped some inodes / pages | ||
128 | * because they were temporarily locked | ||
129 | */ | ||
130 | sync_filesystems(0); | ||
131 | sync_filesystems(0); | ||
132 | printk("Emergency Sync complete\n"); | ||
48 | kfree(work); | 133 | kfree(work); |
49 | } | 134 | } |
50 | 135 | ||
@@ -75,10 +160,8 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) | |||
75 | 160 | ||
76 | /* sync the superblock to buffers */ | 161 | /* sync the superblock to buffers */ |
77 | sb = inode->i_sb; | 162 | sb = inode->i_sb; |
78 | lock_super(sb); | ||
79 | if (sb->s_dirt && sb->s_op->write_super) | 163 | if (sb->s_dirt && sb->s_op->write_super) |
80 | sb->s_op->write_super(sb); | 164 | sb->s_op->write_super(sb); |
81 | unlock_super(sb); | ||
82 | 165 | ||
83 | /* .. finally sync the buffers to disk */ | 166 | /* .. finally sync the buffers to disk */ |
84 | err = sync_blockdev(sb->s_bdev); | 167 | err = sync_blockdev(sb->s_bdev); |
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 56f655254bfe..c7798079e644 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c | |||
@@ -24,7 +24,7 @@ static int sysv_readdir(struct file *, void *, filldir_t); | |||
24 | const struct file_operations sysv_dir_operations = { | 24 | const struct file_operations sysv_dir_operations = { |
25 | .read = generic_read_dir, | 25 | .read = generic_read_dir, |
26 | .readdir = sysv_readdir, | 26 | .readdir = sysv_readdir, |
27 | .fsync = sysv_sync_file, | 27 | .fsync = simple_fsync, |
28 | }; | 28 | }; |
29 | 29 | ||
30 | static inline void dir_put_page(struct page *page) | 30 | static inline void dir_put_page(struct page *page) |
diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 589be21d884e..96340c01f4a7 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c | |||
@@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = { | |||
26 | .write = do_sync_write, | 26 | .write = do_sync_write, |
27 | .aio_write = generic_file_aio_write, | 27 | .aio_write = generic_file_aio_write, |
28 | .mmap = generic_file_mmap, | 28 | .mmap = generic_file_mmap, |
29 | .fsync = sysv_sync_file, | 29 | .fsync = simple_fsync, |
30 | .splice_read = generic_file_splice_read, | 30 | .splice_read = generic_file_splice_read, |
31 | }; | 31 | }; |
32 | 32 | ||
@@ -34,18 +34,3 @@ const struct inode_operations sysv_file_inode_operations = { | |||
34 | .truncate = sysv_truncate, | 34 | .truncate = sysv_truncate, |
35 | .getattr = sysv_getattr, | 35 | .getattr = sysv_getattr, |
36 | }; | 36 | }; |
37 | |||
38 | int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync) | ||
39 | { | ||
40 | struct inode *inode = dentry->d_inode; | ||
41 | int err; | ||
42 | |||
43 | err = sync_mapping_buffers(inode->i_mapping); | ||
44 | if (!(inode->i_state & I_DIRTY)) | ||
45 | return err; | ||
46 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
47 | return err; | ||
48 | |||
49 | err |= sysv_sync_inode(inode); | ||
50 | return err ? -EIO : 0; | ||
51 | } | ||
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index da20b48d350f..479923456a54 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -31,15 +31,13 @@ | |||
31 | #include <asm/byteorder.h> | 31 | #include <asm/byteorder.h> |
32 | #include "sysv.h" | 32 | #include "sysv.h" |
33 | 33 | ||
34 | /* This is only called on sync() and umount(), when s_dirt=1. */ | 34 | static int sysv_sync_fs(struct super_block *sb, int wait) |
35 | static void sysv_write_super(struct super_block *sb) | ||
36 | { | 35 | { |
37 | struct sysv_sb_info *sbi = SYSV_SB(sb); | 36 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
38 | unsigned long time = get_seconds(), old_time; | 37 | unsigned long time = get_seconds(), old_time; |
39 | 38 | ||
39 | lock_super(sb); | ||
40 | lock_kernel(); | 40 | lock_kernel(); |
41 | if (sb->s_flags & MS_RDONLY) | ||
42 | goto clean; | ||
43 | 41 | ||
44 | /* | 42 | /* |
45 | * If we are going to write out the super block, | 43 | * If we are going to write out the super block, |
@@ -53,18 +51,30 @@ static void sysv_write_super(struct super_block *sb) | |||
53 | *sbi->s_sb_time = cpu_to_fs32(sbi, time); | 51 | *sbi->s_sb_time = cpu_to_fs32(sbi, time); |
54 | mark_buffer_dirty(sbi->s_bh2); | 52 | mark_buffer_dirty(sbi->s_bh2); |
55 | } | 53 | } |
56 | clean: | 54 | |
57 | sb->s_dirt = 0; | ||
58 | unlock_kernel(); | 55 | unlock_kernel(); |
56 | unlock_super(sb); | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static void sysv_write_super(struct super_block *sb) | ||
62 | { | ||
63 | if (!(sb->s_flags & MS_RDONLY)) | ||
64 | sysv_sync_fs(sb, 1); | ||
65 | else | ||
66 | sb->s_dirt = 0; | ||
59 | } | 67 | } |
60 | 68 | ||
61 | static int sysv_remount(struct super_block *sb, int *flags, char *data) | 69 | static int sysv_remount(struct super_block *sb, int *flags, char *data) |
62 | { | 70 | { |
63 | struct sysv_sb_info *sbi = SYSV_SB(sb); | 71 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
72 | lock_super(sb); | ||
64 | if (sbi->s_forced_ro) | 73 | if (sbi->s_forced_ro) |
65 | *flags |= MS_RDONLY; | 74 | *flags |= MS_RDONLY; |
66 | if (!(*flags & MS_RDONLY)) | 75 | if (!(*flags & MS_RDONLY)) |
67 | sb->s_dirt = 1; | 76 | sb->s_dirt = 1; |
77 | unlock_super(sb); | ||
68 | return 0; | 78 | return 0; |
69 | } | 79 | } |
70 | 80 | ||
@@ -72,6 +82,11 @@ static void sysv_put_super(struct super_block *sb) | |||
72 | { | 82 | { |
73 | struct sysv_sb_info *sbi = SYSV_SB(sb); | 83 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
74 | 84 | ||
85 | lock_kernel(); | ||
86 | |||
87 | if (sb->s_dirt) | ||
88 | sysv_write_super(sb); | ||
89 | |||
75 | if (!(sb->s_flags & MS_RDONLY)) { | 90 | if (!(sb->s_flags & MS_RDONLY)) { |
76 | /* XXX ext2 also updates the state here */ | 91 | /* XXX ext2 also updates the state here */ |
77 | mark_buffer_dirty(sbi->s_bh1); | 92 | mark_buffer_dirty(sbi->s_bh1); |
@@ -84,6 +99,8 @@ static void sysv_put_super(struct super_block *sb) | |||
84 | brelse(sbi->s_bh2); | 99 | brelse(sbi->s_bh2); |
85 | 100 | ||
86 | kfree(sbi); | 101 | kfree(sbi); |
102 | |||
103 | unlock_kernel(); | ||
87 | } | 104 | } |
88 | 105 | ||
89 | static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) | 106 | static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) |
@@ -236,7 +253,7 @@ bad_inode: | |||
236 | return ERR_PTR(-EIO); | 253 | return ERR_PTR(-EIO); |
237 | } | 254 | } |
238 | 255 | ||
239 | static struct buffer_head * sysv_update_inode(struct inode * inode) | 256 | int sysv_write_inode(struct inode *inode, int wait) |
240 | { | 257 | { |
241 | struct super_block * sb = inode->i_sb; | 258 | struct super_block * sb = inode->i_sb; |
242 | struct sysv_sb_info * sbi = SYSV_SB(sb); | 259 | struct sysv_sb_info * sbi = SYSV_SB(sb); |
@@ -244,19 +261,21 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) | |||
244 | struct sysv_inode * raw_inode; | 261 | struct sysv_inode * raw_inode; |
245 | struct sysv_inode_info * si; | 262 | struct sysv_inode_info * si; |
246 | unsigned int ino, block; | 263 | unsigned int ino, block; |
264 | int err = 0; | ||
247 | 265 | ||
248 | ino = inode->i_ino; | 266 | ino = inode->i_ino; |
249 | if (!ino || ino > sbi->s_ninodes) { | 267 | if (!ino || ino > sbi->s_ninodes) { |
250 | printk("Bad inode number on dev %s: %d is out of range\n", | 268 | printk("Bad inode number on dev %s: %d is out of range\n", |
251 | inode->i_sb->s_id, ino); | 269 | inode->i_sb->s_id, ino); |
252 | return NULL; | 270 | return -EIO; |
253 | } | 271 | } |
254 | raw_inode = sysv_raw_inode(sb, ino, &bh); | 272 | raw_inode = sysv_raw_inode(sb, ino, &bh); |
255 | if (!raw_inode) { | 273 | if (!raw_inode) { |
256 | printk("unable to read i-node block\n"); | 274 | printk("unable to read i-node block\n"); |
257 | return NULL; | 275 | return -EIO; |
258 | } | 276 | } |
259 | 277 | ||
278 | lock_kernel(); | ||
260 | raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); | 279 | raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); |
261 | raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); | 280 | raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); |
262 | raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); | 281 | raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); |
@@ -272,38 +291,23 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) | |||
272 | for (block = 0; block < 10+1+1+1; block++) | 291 | for (block = 0; block < 10+1+1+1; block++) |
273 | write3byte(sbi, (u8 *)&si->i_data[block], | 292 | write3byte(sbi, (u8 *)&si->i_data[block], |
274 | &raw_inode->i_data[3*block]); | 293 | &raw_inode->i_data[3*block]); |
294 | unlock_kernel(); | ||
275 | mark_buffer_dirty(bh); | 295 | mark_buffer_dirty(bh); |
276 | return bh; | 296 | if (wait) { |
277 | } | 297 | sync_dirty_buffer(bh); |
278 | 298 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | |
279 | int sysv_write_inode(struct inode * inode, int wait) | 299 | printk ("IO error syncing sysv inode [%s:%08x]\n", |
280 | { | 300 | sb->s_id, ino); |
281 | struct buffer_head *bh; | 301 | err = -EIO; |
282 | lock_kernel(); | 302 | } |
283 | bh = sysv_update_inode(inode); | 303 | } |
284 | brelse(bh); | 304 | brelse(bh); |
285 | unlock_kernel(); | ||
286 | return 0; | 305 | return 0; |
287 | } | 306 | } |
288 | 307 | ||
289 | int sysv_sync_inode(struct inode * inode) | 308 | int sysv_sync_inode(struct inode *inode) |
290 | { | 309 | { |
291 | int err = 0; | 310 | return sysv_write_inode(inode, 1); |
292 | struct buffer_head *bh; | ||
293 | |||
294 | bh = sysv_update_inode(inode); | ||
295 | if (bh && buffer_dirty(bh)) { | ||
296 | sync_dirty_buffer(bh); | ||
297 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | ||
298 | printk ("IO error syncing sysv inode [%s:%08lx]\n", | ||
299 | inode->i_sb->s_id, inode->i_ino); | ||
300 | err = -1; | ||
301 | } | ||
302 | } | ||
303 | else if (!bh) | ||
304 | err = -1; | ||
305 | brelse (bh); | ||
306 | return err; | ||
307 | } | 311 | } |
308 | 312 | ||
309 | static void sysv_delete_inode(struct inode *inode) | 313 | static void sysv_delete_inode(struct inode *inode) |
@@ -347,6 +351,7 @@ const struct super_operations sysv_sops = { | |||
347 | .delete_inode = sysv_delete_inode, | 351 | .delete_inode = sysv_delete_inode, |
348 | .put_super = sysv_put_super, | 352 | .put_super = sysv_put_super, |
349 | .write_super = sysv_write_super, | 353 | .write_super = sysv_write_super, |
354 | .sync_fs = sysv_sync_fs, | ||
350 | .remount_fs = sysv_remount, | 355 | .remount_fs = sysv_remount, |
351 | .statfs = sysv_statfs, | 356 | .statfs = sysv_statfs, |
352 | }; | 357 | }; |
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 5784a318c883..53786eb5cf60 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h | |||
@@ -144,7 +144,6 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, | |||
144 | extern struct inode *sysv_iget(struct super_block *, unsigned int); | 144 | extern struct inode *sysv_iget(struct super_block *, unsigned int); |
145 | extern int sysv_write_inode(struct inode *, int); | 145 | extern int sysv_write_inode(struct inode *, int); |
146 | extern int sysv_sync_inode(struct inode *); | 146 | extern int sysv_sync_inode(struct inode *); |
147 | extern int sysv_sync_file(struct file *, struct dentry *, int); | ||
148 | extern void sysv_set_inode(struct inode *, dev_t); | 147 | extern void sysv_set_inode(struct inode *, dev_t); |
149 | extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 148 | extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
150 | extern int sysv_init_icache(void); | 149 | extern int sysv_init_icache(void); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index e9f7a754c4f7..3589eab02a2f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/mount.h> | 36 | #include <linux/mount.h> |
37 | #include <linux/math64.h> | 37 | #include <linux/math64.h> |
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | #include <linux/smp_lock.h> | ||
39 | #include "ubifs.h" | 40 | #include "ubifs.h" |
40 | 41 | ||
41 | /* | 42 | /* |
@@ -447,9 +448,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) | |||
447 | if (!wait) | 448 | if (!wait) |
448 | return 0; | 449 | return 0; |
449 | 450 | ||
450 | if (sb->s_flags & MS_RDONLY) | ||
451 | return 0; | ||
452 | |||
453 | /* | 451 | /* |
454 | * VFS calls '->sync_fs()' before synchronizing all dirty inodes and | 452 | * VFS calls '->sync_fs()' before synchronizing all dirty inodes and |
455 | * pages, so synchronize them first, then commit the journal. Strictly | 453 | * pages, so synchronize them first, then commit the journal. Strictly |
@@ -1687,6 +1685,9 @@ static void ubifs_put_super(struct super_block *sb) | |||
1687 | 1685 | ||
1688 | ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, | 1686 | ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, |
1689 | c->vi.vol_id); | 1687 | c->vi.vol_id); |
1688 | |||
1689 | lock_kernel(); | ||
1690 | |||
1690 | /* | 1691 | /* |
1691 | * The following asserts are only valid if there has not been a failure | 1692 | * The following asserts are only valid if there has not been a failure |
1692 | * of the media. For example, there will be dirty inodes if we failed | 1693 | * of the media. For example, there will be dirty inodes if we failed |
@@ -1753,6 +1754,8 @@ static void ubifs_put_super(struct super_block *sb) | |||
1753 | ubi_close_volume(c->ubi); | 1754 | ubi_close_volume(c->ubi); |
1754 | mutex_unlock(&c->umount_mutex); | 1755 | mutex_unlock(&c->umount_mutex); |
1755 | kfree(c); | 1756 | kfree(c); |
1757 | |||
1758 | unlock_kernel(); | ||
1756 | } | 1759 | } |
1757 | 1760 | ||
1758 | static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | 1761 | static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) |
@@ -1768,17 +1771,22 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1768 | return err; | 1771 | return err; |
1769 | } | 1772 | } |
1770 | 1773 | ||
1774 | lock_kernel(); | ||
1771 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | 1775 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { |
1772 | if (c->ro_media) { | 1776 | if (c->ro_media) { |
1773 | ubifs_msg("cannot re-mount due to prior errors"); | 1777 | ubifs_msg("cannot re-mount due to prior errors"); |
1778 | unlock_kernel(); | ||
1774 | return -EROFS; | 1779 | return -EROFS; |
1775 | } | 1780 | } |
1776 | err = ubifs_remount_rw(c); | 1781 | err = ubifs_remount_rw(c); |
1777 | if (err) | 1782 | if (err) { |
1783 | unlock_kernel(); | ||
1778 | return err; | 1784 | return err; |
1785 | } | ||
1779 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { | 1786 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { |
1780 | if (c->ro_media) { | 1787 | if (c->ro_media) { |
1781 | ubifs_msg("cannot re-mount due to prior errors"); | 1788 | ubifs_msg("cannot re-mount due to prior errors"); |
1789 | unlock_kernel(); | ||
1782 | return -EROFS; | 1790 | return -EROFS; |
1783 | } | 1791 | } |
1784 | ubifs_remount_ro(c); | 1792 | ubifs_remount_ro(c); |
@@ -1793,6 +1801,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1793 | } | 1801 | } |
1794 | 1802 | ||
1795 | ubifs_assert(c->lst.taken_empty_lebs > 0); | 1803 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1804 | unlock_kernel(); | ||
1796 | return 0; | 1805 | return 0; |
1797 | } | 1806 | } |
1798 | 1807 | ||
diff --git a/fs/udf/Makefile b/fs/udf/Makefile index 0d4503f7446d..eb880f66c23a 100644 --- a/fs/udf/Makefile +++ b/fs/udf/Makefile | |||
@@ -5,5 +5,5 @@ | |||
5 | obj-$(CONFIG_UDF_FS) += udf.o | 5 | obj-$(CONFIG_UDF_FS) += udf.o |
6 | 6 | ||
7 | udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \ | 7 | udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \ |
8 | partition.o super.o truncate.o symlink.o fsync.o \ | 8 | partition.o super.o truncate.o symlink.o \ |
9 | directory.o misc.o udftime.o unicode.o | 9 | directory.o misc.o udftime.o unicode.o |
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 2efd4d5291b6..61d9a76a3a69 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
@@ -210,5 +210,5 @@ const struct file_operations udf_dir_operations = { | |||
210 | .read = generic_read_dir, | 210 | .read = generic_read_dir, |
211 | .readdir = udf_readdir, | 211 | .readdir = udf_readdir, |
212 | .ioctl = udf_ioctl, | 212 | .ioctl = udf_ioctl, |
213 | .fsync = udf_fsync_file, | 213 | .fsync = simple_fsync, |
214 | }; | 214 | }; |
diff --git a/fs/udf/file.c b/fs/udf/file.c index eb91f3b70320..7464305382b5 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -209,7 +209,7 @@ const struct file_operations udf_file_operations = { | |||
209 | .write = do_sync_write, | 209 | .write = do_sync_write, |
210 | .aio_write = udf_file_aio_write, | 210 | .aio_write = udf_file_aio_write, |
211 | .release = udf_release_file, | 211 | .release = udf_release_file, |
212 | .fsync = udf_fsync_file, | 212 | .fsync = simple_fsync, |
213 | .splice_read = generic_file_splice_read, | 213 | .splice_read = generic_file_splice_read, |
214 | .llseek = generic_file_llseek, | 214 | .llseek = generic_file_llseek, |
215 | }; | 215 | }; |
diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c deleted file mode 100644 index b2c472b733b8..000000000000 --- a/fs/udf/fsync.c +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* | ||
2 | * fsync.c | ||
3 | * | ||
4 | * PURPOSE | ||
5 | * Fsync handling routines for the OSTA-UDF(tm) filesystem. | ||
6 | * | ||
7 | * COPYRIGHT | ||
8 | * This file is distributed under the terms of the GNU General Public | ||
9 | * License (GPL). Copies of the GPL can be obtained from: | ||
10 | * ftp://prep.ai.mit.edu/pub/gnu/GPL | ||
11 | * Each contributing author retains all rights to their own work. | ||
12 | * | ||
13 | * (C) 1999-2001 Ben Fennema | ||
14 | * (C) 1999-2000 Stelias Computing Inc | ||
15 | * | ||
16 | * HISTORY | ||
17 | * | ||
18 | * 05/22/99 blf Created. | ||
19 | */ | ||
20 | |||
21 | #include "udfdecl.h" | ||
22 | |||
23 | #include <linux/fs.h> | ||
24 | |||
25 | static int udf_fsync_inode(struct inode *, int); | ||
26 | |||
27 | /* | ||
28 | * File may be NULL when we are called. Perhaps we shouldn't | ||
29 | * even pass file to fsync ? | ||
30 | */ | ||
31 | |||
32 | int udf_fsync_file(struct file *file, struct dentry *dentry, int datasync) | ||
33 | { | ||
34 | struct inode *inode = dentry->d_inode; | ||
35 | |||
36 | return udf_fsync_inode(inode, datasync); | ||
37 | } | ||
38 | |||
39 | static int udf_fsync_inode(struct inode *inode, int datasync) | ||
40 | { | ||
41 | int err; | ||
42 | |||
43 | err = sync_mapping_buffers(inode->i_mapping); | ||
44 | if (!(inode->i_state & I_DIRTY)) | ||
45 | return err; | ||
46 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
47 | return err; | ||
48 | |||
49 | err |= udf_sync_inode(inode); | ||
50 | |||
51 | return err ? -EIO : 0; | ||
52 | } | ||
diff --git a/fs/udf/super.c b/fs/udf/super.c index 72348cc855a4..6832135159b6 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -568,6 +568,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
568 | if (!udf_parse_options(options, &uopt, true)) | 568 | if (!udf_parse_options(options, &uopt, true)) |
569 | return -EINVAL; | 569 | return -EINVAL; |
570 | 570 | ||
571 | lock_kernel(); | ||
571 | sbi->s_flags = uopt.flags; | 572 | sbi->s_flags = uopt.flags; |
572 | sbi->s_uid = uopt.uid; | 573 | sbi->s_uid = uopt.uid; |
573 | sbi->s_gid = uopt.gid; | 574 | sbi->s_gid = uopt.gid; |
@@ -581,13 +582,16 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
581 | *flags |= MS_RDONLY; | 582 | *flags |= MS_RDONLY; |
582 | } | 583 | } |
583 | 584 | ||
584 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 585 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { |
586 | unlock_kernel(); | ||
585 | return 0; | 587 | return 0; |
588 | } | ||
586 | if (*flags & MS_RDONLY) | 589 | if (*flags & MS_RDONLY) |
587 | udf_close_lvid(sb); | 590 | udf_close_lvid(sb); |
588 | else | 591 | else |
589 | udf_open_lvid(sb); | 592 | udf_open_lvid(sb); |
590 | 593 | ||
594 | unlock_kernel(); | ||
591 | return 0; | 595 | return 0; |
592 | } | 596 | } |
593 | 597 | ||
@@ -1915,7 +1919,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1915 | if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) { | 1919 | if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) { |
1916 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); | 1920 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); |
1917 | } else { | 1921 | } else { |
1918 | uopt.blocksize = bdev_hardsect_size(sb->s_bdev); | 1922 | uopt.blocksize = bdev_logical_block_size(sb->s_bdev); |
1919 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); | 1923 | ret = udf_load_vrs(sb, &uopt, silent, &fileset); |
1920 | if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { | 1924 | if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { |
1921 | if (!silent) | 1925 | if (!silent) |
@@ -2062,6 +2066,9 @@ static void udf_put_super(struct super_block *sb) | |||
2062 | struct udf_sb_info *sbi; | 2066 | struct udf_sb_info *sbi; |
2063 | 2067 | ||
2064 | sbi = UDF_SB(sb); | 2068 | sbi = UDF_SB(sb); |
2069 | |||
2070 | lock_kernel(); | ||
2071 | |||
2065 | if (sbi->s_vat_inode) | 2072 | if (sbi->s_vat_inode) |
2066 | iput(sbi->s_vat_inode); | 2073 | iput(sbi->s_vat_inode); |
2067 | if (sbi->s_partitions) | 2074 | if (sbi->s_partitions) |
@@ -2077,6 +2084,8 @@ static void udf_put_super(struct super_block *sb) | |||
2077 | kfree(sbi->s_partmaps); | 2084 | kfree(sbi->s_partmaps); |
2078 | kfree(sb->s_fs_info); | 2085 | kfree(sb->s_fs_info); |
2079 | sb->s_fs_info = NULL; | 2086 | sb->s_fs_info = NULL; |
2087 | |||
2088 | unlock_kernel(); | ||
2080 | } | 2089 | } |
2081 | 2090 | ||
2082 | static int udf_sync_fs(struct super_block *sb, int wait) | 2091 | static int udf_sync_fs(struct super_block *sb, int wait) |
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index cac51b77a5d1..8d46f4294ee7 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
@@ -223,9 +223,6 @@ extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, | |||
223 | extern int udf_new_block(struct super_block *, struct inode *, uint16_t, | 223 | extern int udf_new_block(struct super_block *, struct inode *, uint16_t, |
224 | uint32_t, int *); | 224 | uint32_t, int *); |
225 | 225 | ||
226 | /* fsync.c */ | ||
227 | extern int udf_fsync_file(struct file *, struct dentry *, int); | ||
228 | |||
229 | /* directory.c */ | 226 | /* directory.c */ |
230 | extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *, | 227 | extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *, |
231 | struct udf_fileident_bh *, | 228 | struct udf_fileident_bh *, |
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 6321b797061b..6f671f1ac271 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c | |||
@@ -666,6 +666,6 @@ not_empty: | |||
666 | const struct file_operations ufs_dir_operations = { | 666 | const struct file_operations ufs_dir_operations = { |
667 | .read = generic_read_dir, | 667 | .read = generic_read_dir, |
668 | .readdir = ufs_readdir, | 668 | .readdir = ufs_readdir, |
669 | .fsync = ufs_sync_file, | 669 | .fsync = simple_fsync, |
670 | .llseek = generic_file_llseek, | 670 | .llseek = generic_file_llseek, |
671 | }; | 671 | }; |
diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 2bd3a1615714..73655c61240a 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c | |||
@@ -24,31 +24,10 @@ | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/buffer_head.h> /* for sync_mapping_buffers() */ | ||
28 | 27 | ||
29 | #include "ufs_fs.h" | 28 | #include "ufs_fs.h" |
30 | #include "ufs.h" | 29 | #include "ufs.h" |
31 | 30 | ||
32 | |||
33 | int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) | ||
34 | { | ||
35 | struct inode *inode = dentry->d_inode; | ||
36 | int err; | ||
37 | int ret; | ||
38 | |||
39 | ret = sync_mapping_buffers(inode->i_mapping); | ||
40 | if (!(inode->i_state & I_DIRTY)) | ||
41 | return ret; | ||
42 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
43 | return ret; | ||
44 | |||
45 | err = ufs_sync_inode(inode); | ||
46 | if (ret == 0) | ||
47 | ret = err; | ||
48 | return ret; | ||
49 | } | ||
50 | |||
51 | |||
52 | /* | 31 | /* |
53 | * We have mostly NULL's here: the current defaults are ok for | 32 | * We have mostly NULL's here: the current defaults are ok for |
54 | * the ufs filesystem. | 33 | * the ufs filesystem. |
@@ -62,6 +41,6 @@ const struct file_operations ufs_file_operations = { | |||
62 | .aio_write = generic_file_aio_write, | 41 | .aio_write = generic_file_aio_write, |
63 | .mmap = generic_file_mmap, | 42 | .mmap = generic_file_mmap, |
64 | .open = generic_file_open, | 43 | .open = generic_file_open, |
65 | .fsync = ufs_sync_file, | 44 | .fsync = simple_fsync, |
66 | .splice_read = generic_file_splice_read, | 45 | .splice_read = generic_file_splice_read, |
67 | }; | 46 | }; |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 60359291761f..5faed7954d0a 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -263,6 +263,7 @@ void ufs_panic (struct super_block * sb, const char * function, | |||
263 | struct ufs_super_block_first * usb1; | 263 | struct ufs_super_block_first * usb1; |
264 | va_list args; | 264 | va_list args; |
265 | 265 | ||
266 | lock_kernel(); | ||
266 | uspi = UFS_SB(sb)->s_uspi; | 267 | uspi = UFS_SB(sb)->s_uspi; |
267 | usb1 = ubh_get_usb_first(uspi); | 268 | usb1 = ubh_get_usb_first(uspi); |
268 | 269 | ||
@@ -594,6 +595,9 @@ static void ufs_put_super_internal(struct super_block *sb) | |||
594 | 595 | ||
595 | 596 | ||
596 | UFSD("ENTER\n"); | 597 | UFSD("ENTER\n"); |
598 | |||
599 | lock_kernel(); | ||
600 | |||
597 | ufs_put_cstotal(sb); | 601 | ufs_put_cstotal(sb); |
598 | size = uspi->s_cssize; | 602 | size = uspi->s_cssize; |
599 | blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; | 603 | blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; |
@@ -621,6 +625,9 @@ static void ufs_put_super_internal(struct super_block *sb) | |||
621 | brelse (sbi->s_ucg[i]); | 625 | brelse (sbi->s_ucg[i]); |
622 | kfree (sbi->s_ucg); | 626 | kfree (sbi->s_ucg); |
623 | kfree (base); | 627 | kfree (base); |
628 | |||
629 | unlock_kernel(); | ||
630 | |||
624 | UFSD("EXIT\n"); | 631 | UFSD("EXIT\n"); |
625 | } | 632 | } |
626 | 633 | ||
@@ -1118,32 +1125,45 @@ failed_nomem: | |||
1118 | return -ENOMEM; | 1125 | return -ENOMEM; |
1119 | } | 1126 | } |
1120 | 1127 | ||
1121 | static void ufs_write_super(struct super_block *sb) | 1128 | static int ufs_sync_fs(struct super_block *sb, int wait) |
1122 | { | 1129 | { |
1123 | struct ufs_sb_private_info * uspi; | 1130 | struct ufs_sb_private_info * uspi; |
1124 | struct ufs_super_block_first * usb1; | 1131 | struct ufs_super_block_first * usb1; |
1125 | struct ufs_super_block_third * usb3; | 1132 | struct ufs_super_block_third * usb3; |
1126 | unsigned flags; | 1133 | unsigned flags; |
1127 | 1134 | ||
1135 | lock_super(sb); | ||
1128 | lock_kernel(); | 1136 | lock_kernel(); |
1137 | |||
1129 | UFSD("ENTER\n"); | 1138 | UFSD("ENTER\n"); |
1139 | |||
1130 | flags = UFS_SB(sb)->s_flags; | 1140 | flags = UFS_SB(sb)->s_flags; |
1131 | uspi = UFS_SB(sb)->s_uspi; | 1141 | uspi = UFS_SB(sb)->s_uspi; |
1132 | usb1 = ubh_get_usb_first(uspi); | 1142 | usb1 = ubh_get_usb_first(uspi); |
1133 | usb3 = ubh_get_usb_third(uspi); | 1143 | usb3 = ubh_get_usb_third(uspi); |
1134 | 1144 | ||
1135 | if (!(sb->s_flags & MS_RDONLY)) { | 1145 | usb1->fs_time = cpu_to_fs32(sb, get_seconds()); |
1136 | usb1->fs_time = cpu_to_fs32(sb, get_seconds()); | 1146 | if ((flags & UFS_ST_MASK) == UFS_ST_SUN || |
1137 | if ((flags & UFS_ST_MASK) == UFS_ST_SUN | 1147 | (flags & UFS_ST_MASK) == UFS_ST_SUNOS || |
1138 | || (flags & UFS_ST_MASK) == UFS_ST_SUNOS | 1148 | (flags & UFS_ST_MASK) == UFS_ST_SUNx86) |
1139 | || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) | 1149 | ufs_set_fs_state(sb, usb1, usb3, |
1140 | ufs_set_fs_state(sb, usb1, usb3, | 1150 | UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); |
1141 | UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); | 1151 | ufs_put_cstotal(sb); |
1142 | ufs_put_cstotal(sb); | ||
1143 | } | ||
1144 | sb->s_dirt = 0; | 1152 | sb->s_dirt = 0; |
1153 | |||
1145 | UFSD("EXIT\n"); | 1154 | UFSD("EXIT\n"); |
1146 | unlock_kernel(); | 1155 | unlock_kernel(); |
1156 | unlock_super(sb); | ||
1157 | |||
1158 | return 0; | ||
1159 | } | ||
1160 | |||
1161 | static void ufs_write_super(struct super_block *sb) | ||
1162 | { | ||
1163 | if (!(sb->s_flags & MS_RDONLY)) | ||
1164 | ufs_sync_fs(sb, 1); | ||
1165 | else | ||
1166 | sb->s_dirt = 0; | ||
1147 | } | 1167 | } |
1148 | 1168 | ||
1149 | static void ufs_put_super(struct super_block *sb) | 1169 | static void ufs_put_super(struct super_block *sb) |
@@ -1152,6 +1172,9 @@ static void ufs_put_super(struct super_block *sb) | |||
1152 | 1172 | ||
1153 | UFSD("ENTER\n"); | 1173 | UFSD("ENTER\n"); |
1154 | 1174 | ||
1175 | if (sb->s_dirt) | ||
1176 | ufs_write_super(sb); | ||
1177 | |||
1155 | if (!(sb->s_flags & MS_RDONLY)) | 1178 | if (!(sb->s_flags & MS_RDONLY)) |
1156 | ufs_put_super_internal(sb); | 1179 | ufs_put_super_internal(sb); |
1157 | 1180 | ||
@@ -1171,7 +1194,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1171 | struct ufs_super_block_third * usb3; | 1194 | struct ufs_super_block_third * usb3; |
1172 | unsigned new_mount_opt, ufstype; | 1195 | unsigned new_mount_opt, ufstype; |
1173 | unsigned flags; | 1196 | unsigned flags; |
1174 | 1197 | ||
1198 | lock_kernel(); | ||
1199 | lock_super(sb); | ||
1175 | uspi = UFS_SB(sb)->s_uspi; | 1200 | uspi = UFS_SB(sb)->s_uspi; |
1176 | flags = UFS_SB(sb)->s_flags; | 1201 | flags = UFS_SB(sb)->s_flags; |
1177 | usb1 = ubh_get_usb_first(uspi); | 1202 | usb1 = ubh_get_usb_first(uspi); |
@@ -1184,17 +1209,24 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1184 | ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE; | 1209 | ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE; |
1185 | new_mount_opt = 0; | 1210 | new_mount_opt = 0; |
1186 | ufs_set_opt (new_mount_opt, ONERROR_LOCK); | 1211 | ufs_set_opt (new_mount_opt, ONERROR_LOCK); |
1187 | if (!ufs_parse_options (data, &new_mount_opt)) | 1212 | if (!ufs_parse_options (data, &new_mount_opt)) { |
1213 | unlock_super(sb); | ||
1214 | unlock_kernel(); | ||
1188 | return -EINVAL; | 1215 | return -EINVAL; |
1216 | } | ||
1189 | if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { | 1217 | if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { |
1190 | new_mount_opt |= ufstype; | 1218 | new_mount_opt |= ufstype; |
1191 | } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { | 1219 | } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { |
1192 | printk("ufstype can't be changed during remount\n"); | 1220 | printk("ufstype can't be changed during remount\n"); |
1221 | unlock_super(sb); | ||
1222 | unlock_kernel(); | ||
1193 | return -EINVAL; | 1223 | return -EINVAL; |
1194 | } | 1224 | } |
1195 | 1225 | ||
1196 | if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { | 1226 | if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { |
1197 | UFS_SB(sb)->s_mount_opt = new_mount_opt; | 1227 | UFS_SB(sb)->s_mount_opt = new_mount_opt; |
1228 | unlock_super(sb); | ||
1229 | unlock_kernel(); | ||
1198 | return 0; | 1230 | return 0; |
1199 | } | 1231 | } |
1200 | 1232 | ||
@@ -1219,6 +1251,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1219 | #ifndef CONFIG_UFS_FS_WRITE | 1251 | #ifndef CONFIG_UFS_FS_WRITE |
1220 | printk("ufs was compiled with read-only support, " | 1252 | printk("ufs was compiled with read-only support, " |
1221 | "can't be mounted as read-write\n"); | 1253 | "can't be mounted as read-write\n"); |
1254 | unlock_super(sb); | ||
1255 | unlock_kernel(); | ||
1222 | return -EINVAL; | 1256 | return -EINVAL; |
1223 | #else | 1257 | #else |
1224 | if (ufstype != UFS_MOUNT_UFSTYPE_SUN && | 1258 | if (ufstype != UFS_MOUNT_UFSTYPE_SUN && |
@@ -1227,16 +1261,22 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1227 | ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && | 1261 | ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && |
1228 | ufstype != UFS_MOUNT_UFSTYPE_UFS2) { | 1262 | ufstype != UFS_MOUNT_UFSTYPE_UFS2) { |
1229 | printk("this ufstype is read-only supported\n"); | 1263 | printk("this ufstype is read-only supported\n"); |
1264 | unlock_super(sb); | ||
1265 | unlock_kernel(); | ||
1230 | return -EINVAL; | 1266 | return -EINVAL; |
1231 | } | 1267 | } |
1232 | if (!ufs_read_cylinder_structures(sb)) { | 1268 | if (!ufs_read_cylinder_structures(sb)) { |
1233 | printk("failed during remounting\n"); | 1269 | printk("failed during remounting\n"); |
1270 | unlock_super(sb); | ||
1271 | unlock_kernel(); | ||
1234 | return -EPERM; | 1272 | return -EPERM; |
1235 | } | 1273 | } |
1236 | sb->s_flags &= ~MS_RDONLY; | 1274 | sb->s_flags &= ~MS_RDONLY; |
1237 | #endif | 1275 | #endif |
1238 | } | 1276 | } |
1239 | UFS_SB(sb)->s_mount_opt = new_mount_opt; | 1277 | UFS_SB(sb)->s_mount_opt = new_mount_opt; |
1278 | unlock_super(sb); | ||
1279 | unlock_kernel(); | ||
1240 | return 0; | 1280 | return 0; |
1241 | } | 1281 | } |
1242 | 1282 | ||
@@ -1352,6 +1392,7 @@ static const struct super_operations ufs_super_ops = { | |||
1352 | .delete_inode = ufs_delete_inode, | 1392 | .delete_inode = ufs_delete_inode, |
1353 | .put_super = ufs_put_super, | 1393 | .put_super = ufs_put_super, |
1354 | .write_super = ufs_write_super, | 1394 | .write_super = ufs_write_super, |
1395 | .sync_fs = ufs_sync_fs, | ||
1355 | .statfs = ufs_statfs, | 1396 | .statfs = ufs_statfs, |
1356 | .remount_fs = ufs_remount, | 1397 | .remount_fs = ufs_remount, |
1357 | .show_options = ufs_show_options, | 1398 | .show_options = ufs_show_options, |
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index d0c4acd4f1f3..644e77e13599 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h | |||
@@ -99,7 +99,6 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, | |||
99 | extern const struct inode_operations ufs_file_inode_operations; | 99 | extern const struct inode_operations ufs_file_inode_operations; |
100 | extern const struct file_operations ufs_file_operations; | 100 | extern const struct file_operations ufs_file_operations; |
101 | extern const struct address_space_operations ufs_aops; | 101 | extern const struct address_space_operations ufs_aops; |
102 | extern int ufs_sync_file(struct file *, struct dentry *, int); | ||
103 | 102 | ||
104 | /* ialloc.c */ | 103 | /* ialloc.c */ |
105 | extern void ufs_free_inode (struct inode *inode); | 104 | extern void ufs_free_inode (struct inode *inode); |
diff --git a/fs/xattr.c b/fs/xattr.c index d51b8f9db921..1c3d0af59ddf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, | |||
297 | return error; | 297 | return error; |
298 | dentry = f->f_path.dentry; | 298 | dentry = f->f_path.dentry; |
299 | audit_inode(NULL, dentry); | 299 | audit_inode(NULL, dentry); |
300 | error = mnt_want_write(f->f_path.mnt); | 300 | error = mnt_want_write_file(f); |
301 | if (!error) { | 301 | if (!error) { |
302 | error = setxattr(dentry, name, value, size, flags); | 302 | error = setxattr(dentry, name, value, size, flags); |
303 | mnt_drop_write(f->f_path.mnt); | 303 | mnt_drop_write(f->f_path.mnt); |
@@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) | |||
524 | return error; | 524 | return error; |
525 | dentry = f->f_path.dentry; | 525 | dentry = f->f_path.dentry; |
526 | audit_inode(NULL, dentry); | 526 | audit_inode(NULL, dentry); |
527 | error = mnt_want_write(f->f_path.mnt); | 527 | error = mnt_want_write_file(f); |
528 | if (!error) { | 528 | if (!error) { |
529 | error = removexattr(dentry, name); | 529 | error = removexattr(dentry, name); |
530 | mnt_drop_write(f->f_path.mnt); | 530 | mnt_drop_write(f->f_path.mnt); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e28800a9f2b5..1418b916fc27 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -1501,7 +1501,7 @@ xfs_setsize_buftarg_early( | |||
1501 | struct block_device *bdev) | 1501 | struct block_device *bdev) |
1502 | { | 1502 | { |
1503 | return xfs_setsize_buftarg_flags(btp, | 1503 | return xfs_setsize_buftarg_flags(btp, |
1504 | PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0); | 1504 | PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); |
1505 | } | 1505 | } |
1506 | 1506 | ||
1507 | int | 1507 | int |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 36fb8a657c55..2e09efbca8db 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1121,15 +1121,6 @@ xfs_fs_put_super( | |||
1121 | kfree(mp); | 1121 | kfree(mp); |
1122 | } | 1122 | } |
1123 | 1123 | ||
1124 | STATIC void | ||
1125 | xfs_fs_write_super( | ||
1126 | struct super_block *sb) | ||
1127 | { | ||
1128 | if (!(sb->s_flags & MS_RDONLY)) | ||
1129 | xfs_sync_fsdata(XFS_M(sb), 0); | ||
1130 | sb->s_dirt = 0; | ||
1131 | } | ||
1132 | |||
1133 | STATIC int | 1124 | STATIC int |
1134 | xfs_fs_sync_super( | 1125 | xfs_fs_sync_super( |
1135 | struct super_block *sb, | 1126 | struct super_block *sb, |
@@ -1154,7 +1145,6 @@ xfs_fs_sync_super( | |||
1154 | error = xfs_quiesce_data(mp); | 1145 | error = xfs_quiesce_data(mp); |
1155 | else | 1146 | else |
1156 | error = xfs_sync_fsdata(mp, 0); | 1147 | error = xfs_sync_fsdata(mp, 0); |
1157 | sb->s_dirt = 0; | ||
1158 | 1148 | ||
1159 | if (unlikely(laptop_mode)) { | 1149 | if (unlikely(laptop_mode)) { |
1160 | int prev_sync_seq = mp->m_sync_seq; | 1150 | int prev_sync_seq = mp->m_sync_seq; |
@@ -1461,7 +1451,6 @@ xfs_fs_fill_super( | |||
1461 | 1451 | ||
1462 | XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); | 1452 | XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); |
1463 | 1453 | ||
1464 | sb->s_dirt = 1; | ||
1465 | sb->s_magic = XFS_SB_MAGIC; | 1454 | sb->s_magic = XFS_SB_MAGIC; |
1466 | sb->s_blocksize = mp->m_sb.sb_blocksize; | 1455 | sb->s_blocksize = mp->m_sb.sb_blocksize; |
1467 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; | 1456 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; |
@@ -1549,7 +1538,6 @@ static struct super_operations xfs_super_operations = { | |||
1549 | .write_inode = xfs_fs_write_inode, | 1538 | .write_inode = xfs_fs_write_inode, |
1550 | .clear_inode = xfs_fs_clear_inode, | 1539 | .clear_inode = xfs_fs_clear_inode, |
1551 | .put_super = xfs_fs_put_super, | 1540 | .put_super = xfs_fs_put_super, |
1552 | .write_super = xfs_fs_write_super, | ||
1553 | .sync_fs = xfs_fs_sync_super, | 1541 | .sync_fs = xfs_fs_sync_super, |
1554 | .freeze_fs = xfs_fs_freeze, | 1542 | .freeze_fs = xfs_fs_freeze, |
1555 | .statfs = xfs_fs_statfs, | 1543 | .statfs = xfs_fs_statfs, |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fffabc0aefcb..66b849358e62 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -628,8 +628,6 @@ xfs_trans_apply_sb_deltas( | |||
628 | xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), | 628 | xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), |
629 | offsetof(xfs_dsb_t, sb_frextents) + | 629 | offsetof(xfs_dsb_t, sb_frextents) + |
630 | sizeof(sbp->sb_frextents) - 1); | 630 | sizeof(sbp->sb_frextents) - 1); |
631 | |||
632 | tp->t_mountp->m_super->s_dirt = 1; | ||
633 | } | 631 | } |
634 | 632 | ||
635 | /* | 633 | /* |